{ "best_metric": null, "best_model_checkpoint": null, "epoch": 19.98631074606434, "eval_steps": 500, "global_step": 7300, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0027378507871321013, "grad_norm": 3.2654504776000977, "learning_rate": 9.998630136986302e-07, "log_odds_chosen": -0.5106310248374939, "log_odds_ratio": -1.2047576904296875, "logits/chosen": -0.9636868238449097, "logits/rejected": -1.0195813179016113, "logps/chosen": -3.0578722953796387, "logps/rejected": -2.4471054077148438, "loss": 2.5752, "nll_loss": 2.4547178745269775, "rewards/accuracies": 0.5, "rewards/chosen": -0.30578720569610596, "rewards/margins": -0.06107669696211815, "rewards/rejected": -0.2447105348110199, "step": 1 }, { "epoch": 0.0054757015742642025, "grad_norm": 3.0847229957580566, "learning_rate": 9.997260273972602e-07, "log_odds_chosen": -0.7200413942337036, "log_odds_ratio": -1.2510905265808105, "logits/chosen": -1.06812584400177, "logits/rejected": -1.059982419013977, "logps/chosen": -3.471505880355835, "logps/rejected": -2.763831853866577, "loss": 2.6536, "nll_loss": 2.5284852981567383, "rewards/accuracies": 0.375, "rewards/chosen": -0.34715062379837036, "rewards/margins": -0.07076742500066757, "rewards/rejected": -0.2763831913471222, "step": 2 }, { "epoch": 0.008213552361396304, "grad_norm": 3.896925210952759, "learning_rate": 9.995890410958904e-07, "log_odds_chosen": -1.8114101886749268, "log_odds_ratio": -2.138590097427368, "logits/chosen": -0.9706584811210632, "logits/rejected": -0.9272528886795044, "logps/chosen": -5.037758827209473, "logps/rejected": -3.2529661655426025, "loss": 2.8889, "nll_loss": 2.675076961517334, "rewards/accuracies": 0.25, "rewards/chosen": -0.5037758350372314, "rewards/margins": -0.17847928404808044, "rewards/rejected": -0.3252966105937958, "step": 3 }, { "epoch": 0.010951403148528405, "grad_norm": 3.5043997764587402, "learning_rate": 9.994520547945206e-07, "log_odds_chosen": -1.295171856880188, "log_odds_ratio": -1.721024990081787, "logits/chosen": -0.8957974910736084, "logits/rejected": -0.8411136865615845, "logps/chosen": -3.890214443206787, "logps/rejected": -2.6046042442321777, "loss": 2.7792, "nll_loss": 2.6071395874023438, "rewards/accuracies": 0.25, "rewards/chosen": -0.3890214264392853, "rewards/margins": -0.12856099009513855, "rewards/rejected": -0.26046043634414673, "step": 4 }, { "epoch": 0.013689253935660506, "grad_norm": 3.4481265544891357, "learning_rate": 9.993150684931506e-07, "log_odds_chosen": -0.6519434452056885, "log_odds_ratio": -1.5155668258666992, "logits/chosen": -1.0442171096801758, "logits/rejected": -1.0643937587738037, "logps/chosen": -3.8943912982940674, "logps/rejected": -3.213207483291626, "loss": 2.6601, "nll_loss": 2.5085225105285645, "rewards/accuracies": 0.5, "rewards/chosen": -0.3894391357898712, "rewards/margins": -0.0681183934211731, "rewards/rejected": -0.3213207423686981, "step": 5 }, { "epoch": 0.01642710472279261, "grad_norm": 3.3164660930633545, "learning_rate": 9.991780821917808e-07, "log_odds_chosen": -0.1983257532119751, "log_odds_ratio": -0.9420785307884216, "logits/chosen": -1.0931787490844727, "logits/rejected": -1.0503027439117432, "logps/chosen": -2.9250640869140625, "logps/rejected": -2.7318553924560547, "loss": 2.6546, "nll_loss": 2.5603904724121094, "rewards/accuracies": 0.625, "rewards/chosen": -0.2925063967704773, "rewards/margins": -0.01932084932923317, "rewards/rejected": -0.2731855511665344, "step": 6 }, { "epoch": 0.019164955509924708, "grad_norm": 3.2535769939422607, "learning_rate": 9.99041095890411e-07, "log_odds_chosen": -0.49844834208488464, "log_odds_ratio": -1.1599137783050537, "logits/chosen": -0.9192939400672913, "logits/rejected": -0.9663747549057007, "logps/chosen": -3.2904181480407715, "logps/rejected": -2.770775079727173, "loss": 2.5813, "nll_loss": 2.4652798175811768, "rewards/accuracies": 0.375, "rewards/chosen": -0.32904180884361267, "rewards/margins": -0.051964305341243744, "rewards/rejected": -0.27707749605178833, "step": 7 }, { "epoch": 0.02190280629705681, "grad_norm": 2.556321620941162, "learning_rate": 9.98904109589041e-07, "log_odds_chosen": 0.2481449693441391, "log_odds_ratio": -0.7001024484634399, "logits/chosen": -0.8446630239486694, "logits/rejected": -0.9190036654472351, "logps/chosen": -2.191213607788086, "logps/rejected": -2.3767471313476562, "loss": 2.4274, "nll_loss": 2.357347249984741, "rewards/accuracies": 0.625, "rewards/chosen": -0.21912136673927307, "rewards/margins": 0.018553361296653748, "rewards/rejected": -0.237674742937088, "step": 8 }, { "epoch": 0.024640657084188913, "grad_norm": 3.542386770248413, "learning_rate": 9.987671232876712e-07, "log_odds_chosen": -0.8172507286071777, "log_odds_ratio": -1.391730785369873, "logits/chosen": -1.0588490962982178, "logits/rejected": -1.0214717388153076, "logps/chosen": -4.688696384429932, "logps/rejected": -3.8786959648132324, "loss": 2.7813, "nll_loss": 2.6421139240264893, "rewards/accuracies": 0.5, "rewards/chosen": -0.4688696265220642, "rewards/margins": -0.08100004494190216, "rewards/rejected": -0.38786959648132324, "step": 9 }, { "epoch": 0.02737850787132101, "grad_norm": 2.93650484085083, "learning_rate": 9.986301369863014e-07, "log_odds_chosen": -0.5240076184272766, "log_odds_ratio": -1.0760942697525024, "logits/chosen": -0.8678291440010071, "logits/rejected": -0.9119645357131958, "logps/chosen": -3.0213963985443115, "logps/rejected": -2.5281379222869873, "loss": 2.5387, "nll_loss": 2.4311065673828125, "rewards/accuracies": 0.25, "rewards/chosen": -0.30213966965675354, "rewards/margins": -0.049325861036777496, "rewards/rejected": -0.25281381607055664, "step": 10 }, { "epoch": 0.030116358658453114, "grad_norm": 3.032290458679199, "learning_rate": 9.984931506849314e-07, "log_odds_chosen": -0.23769843578338623, "log_odds_ratio": -0.9971656799316406, "logits/chosen": -0.9383009672164917, "logits/rejected": -0.9241993427276611, "logps/chosen": -2.7974982261657715, "logps/rejected": -2.5075368881225586, "loss": 2.4842, "nll_loss": 2.384490489959717, "rewards/accuracies": 0.5, "rewards/chosen": -0.2797498106956482, "rewards/margins": -0.02899612858891487, "rewards/rejected": -0.2507537007331848, "step": 11 }, { "epoch": 0.03285420944558522, "grad_norm": 3.594911575317383, "learning_rate": 9.983561643835616e-07, "log_odds_chosen": -1.2594561576843262, "log_odds_ratio": -1.8575801849365234, "logits/chosen": -1.0133650302886963, "logits/rejected": -0.894416332244873, "logps/chosen": -4.0420026779174805, "logps/rejected": -2.800290584564209, "loss": 2.8281, "nll_loss": 2.642357110977173, "rewards/accuracies": 0.25, "rewards/chosen": -0.40420031547546387, "rewards/margins": -0.1241711974143982, "rewards/rejected": -0.2800290882587433, "step": 12 }, { "epoch": 0.03559206023271732, "grad_norm": 2.835012912750244, "learning_rate": 9.982191780821918e-07, "log_odds_chosen": 0.3601546287536621, "log_odds_ratio": -0.5658332705497742, "logits/chosen": -0.9747136831283569, "logits/rejected": -1.0202211141586304, "logps/chosen": -2.5146713256835938, "logps/rejected": -2.851346731185913, "loss": 2.4284, "nll_loss": 2.3718044757843018, "rewards/accuracies": 0.75, "rewards/chosen": -0.25146713852882385, "rewards/margins": 0.03366754204034805, "rewards/rejected": -0.2851346731185913, "step": 13 }, { "epoch": 0.038329911019849415, "grad_norm": 3.4584782123565674, "learning_rate": 9.980821917808218e-07, "log_odds_chosen": -0.7127408981323242, "log_odds_ratio": -1.2800359725952148, "logits/chosen": -1.029188871383667, "logits/rejected": -1.0018490552902222, "logps/chosen": -3.572725772857666, "logps/rejected": -2.831144332885742, "loss": 2.679, "nll_loss": 2.5510425567626953, "rewards/accuracies": 0.25, "rewards/chosen": -0.3572726249694824, "rewards/margins": -0.07415817677974701, "rewards/rejected": -0.2831144332885742, "step": 14 }, { "epoch": 0.04106776180698152, "grad_norm": 3.591355323791504, "learning_rate": 9.97945205479452e-07, "log_odds_chosen": -0.7708600759506226, "log_odds_ratio": -1.6791977882385254, "logits/chosen": -1.0173054933547974, "logits/rejected": -0.9782910346984863, "logps/chosen": -3.8062026500701904, "logps/rejected": -2.979600191116333, "loss": 2.7646, "nll_loss": 2.596719741821289, "rewards/accuracies": 0.625, "rewards/chosen": -0.38062024116516113, "rewards/margins": -0.08266022801399231, "rewards/rejected": -0.2979600429534912, "step": 15 }, { "epoch": 0.04380561259411362, "grad_norm": 2.914293050765991, "learning_rate": 9.978082191780822e-07, "log_odds_chosen": 0.13335683941841125, "log_odds_ratio": -0.7549812197685242, "logits/chosen": -0.9615088105201721, "logits/rejected": -1.0107661485671997, "logps/chosen": -2.9873759746551514, "logps/rejected": -3.0672965049743652, "loss": 2.5531, "nll_loss": 2.4775803089141846, "rewards/accuracies": 0.625, "rewards/chosen": -0.2987375855445862, "rewards/margins": 0.00799208227545023, "rewards/rejected": -0.30672967433929443, "step": 16 }, { "epoch": 0.04654346338124572, "grad_norm": 3.5791068077087402, "learning_rate": 9.976712328767124e-07, "log_odds_chosen": -0.7579538822174072, "log_odds_ratio": -1.298214316368103, "logits/chosen": -1.0131895542144775, "logits/rejected": -0.9920966029167175, "logps/chosen": -4.014985084533691, "logps/rejected": -3.26096248626709, "loss": 2.7993, "nll_loss": 2.669471263885498, "rewards/accuracies": 0.375, "rewards/chosen": -0.4014985263347626, "rewards/margins": -0.07540225237607956, "rewards/rejected": -0.32609623670578003, "step": 17 }, { "epoch": 0.049281314168377825, "grad_norm": 3.1257638931274414, "learning_rate": 9.975342465753424e-07, "log_odds_chosen": -0.10427369177341461, "log_odds_ratio": -0.9216517806053162, "logits/chosen": -0.8576212525367737, "logits/rejected": -0.9107890129089355, "logps/chosen": -2.741581916809082, "logps/rejected": -2.5708892345428467, "loss": 2.5641, "nll_loss": 2.4719605445861816, "rewards/accuracies": 0.5, "rewards/chosen": -0.27415820956230164, "rewards/margins": -0.017069285735487938, "rewards/rejected": -0.25708889961242676, "step": 18 }, { "epoch": 0.05201916495550993, "grad_norm": 2.9874565601348877, "learning_rate": 9.973972602739726e-07, "log_odds_chosen": -0.36761170625686646, "log_odds_ratio": -1.0936015844345093, "logits/chosen": -0.9818375110626221, "logits/rejected": -1.1630520820617676, "logps/chosen": -3.3724007606506348, "logps/rejected": -2.9565560817718506, "loss": 2.5272, "nll_loss": 2.41787052154541, "rewards/accuracies": 0.5, "rewards/chosen": -0.337240070104599, "rewards/margins": -0.041584476828575134, "rewards/rejected": -0.29565560817718506, "step": 19 }, { "epoch": 0.05475701574264202, "grad_norm": 2.877781629562378, "learning_rate": 9.972602739726028e-07, "log_odds_chosen": -0.024758145213127136, "log_odds_ratio": -0.8487261533737183, "logits/chosen": -0.871157705783844, "logits/rejected": -0.9183536767959595, "logps/chosen": -2.5682857036590576, "logps/rejected": -2.505056142807007, "loss": 2.3829, "nll_loss": 2.2980480194091797, "rewards/accuracies": 0.625, "rewards/chosen": -0.25682857632637024, "rewards/margins": -0.006322940811514854, "rewards/rejected": -0.25050562620162964, "step": 20 }, { "epoch": 0.057494866529774126, "grad_norm": 2.9118804931640625, "learning_rate": 9.971232876712328e-07, "log_odds_chosen": 0.7815825343132019, "log_odds_ratio": -0.494041383266449, "logits/chosen": -0.8875564336776733, "logits/rejected": -0.9611893892288208, "logps/chosen": -2.2235820293426514, "logps/rejected": -2.9299683570861816, "loss": 2.4961, "nll_loss": 2.4467294216156006, "rewards/accuracies": 0.75, "rewards/chosen": -0.22235821187496185, "rewards/margins": 0.07063861191272736, "rewards/rejected": -0.2929968237876892, "step": 21 }, { "epoch": 0.06023271731690623, "grad_norm": 3.172863721847534, "learning_rate": 9.96986301369863e-07, "log_odds_chosen": 0.21546898782253265, "log_odds_ratio": -0.8715327978134155, "logits/chosen": -1.1187394857406616, "logits/rejected": -1.065380573272705, "logps/chosen": -2.9908175468444824, "logps/rejected": -3.1940667629241943, "loss": 2.5858, "nll_loss": 2.4986939430236816, "rewards/accuracies": 0.5, "rewards/chosen": -0.2990817725658417, "rewards/margins": 0.020324893295764923, "rewards/rejected": -0.319406658411026, "step": 22 }, { "epoch": 0.06297056810403832, "grad_norm": 3.3559956550598145, "learning_rate": 9.968493150684932e-07, "log_odds_chosen": -0.24320575594902039, "log_odds_ratio": -0.9749249219894409, "logits/chosen": -1.004804015159607, "logits/rejected": -1.036738634109497, "logps/chosen": -3.2607431411743164, "logps/rejected": -2.9983439445495605, "loss": 2.5814, "nll_loss": 2.4838647842407227, "rewards/accuracies": 0.5, "rewards/chosen": -0.3260743021965027, "rewards/margins": -0.026239924132823944, "rewards/rejected": -0.29983440041542053, "step": 23 }, { "epoch": 0.06570841889117043, "grad_norm": 3.046931028366089, "learning_rate": 9.967123287671232e-07, "log_odds_chosen": -0.2152610868215561, "log_odds_ratio": -0.953547477722168, "logits/chosen": -0.9757410287857056, "logits/rejected": -0.9575390219688416, "logps/chosen": -3.147953510284424, "logps/rejected": -2.912590980529785, "loss": 2.5352, "nll_loss": 2.439873695373535, "rewards/accuracies": 0.5, "rewards/chosen": -0.3147953748703003, "rewards/margins": -0.023536263033747673, "rewards/rejected": -0.29125910997390747, "step": 24 }, { "epoch": 0.06844626967830253, "grad_norm": 3.3836586475372314, "learning_rate": 9.965753424657534e-07, "log_odds_chosen": -0.5600754618644714, "log_odds_ratio": -1.2587569952011108, "logits/chosen": -1.0444267988204956, "logits/rejected": -1.0171830654144287, "logps/chosen": -3.152388334274292, "logps/rejected": -2.5578300952911377, "loss": 2.764, "nll_loss": 2.6380772590637207, "rewards/accuracies": 0.625, "rewards/chosen": -0.3152388334274292, "rewards/margins": -0.05945581570267677, "rewards/rejected": -0.2557830214500427, "step": 25 }, { "epoch": 0.07118412046543464, "grad_norm": 2.947374105453491, "learning_rate": 9.964383561643836e-07, "log_odds_chosen": 0.28650134801864624, "log_odds_ratio": -0.6512345671653748, "logits/chosen": -0.9169235229492188, "logits/rejected": -0.9963619709014893, "logps/chosen": -2.7883646488189697, "logps/rejected": -2.9986307621002197, "loss": 2.4941, "nll_loss": 2.4290010929107666, "rewards/accuracies": 0.75, "rewards/chosen": -0.2788364589214325, "rewards/margins": 0.02102663740515709, "rewards/rejected": -0.2998631000518799, "step": 26 }, { "epoch": 0.07392197125256673, "grad_norm": 3.013309955596924, "learning_rate": 9.963013698630136e-07, "log_odds_chosen": -0.23637942969799042, "log_odds_ratio": -0.9623304605484009, "logits/chosen": -0.9564924240112305, "logits/rejected": -1.021548867225647, "logps/chosen": -3.2541375160217285, "logps/rejected": -2.9791014194488525, "loss": 2.5074, "nll_loss": 2.411170244216919, "rewards/accuracies": 0.625, "rewards/chosen": -0.3254137337207794, "rewards/margins": -0.027503617107868195, "rewards/rejected": -0.2979101538658142, "step": 27 }, { "epoch": 0.07665982203969883, "grad_norm": 3.4510834217071533, "learning_rate": 9.961643835616438e-07, "log_odds_chosen": -0.3438650965690613, "log_odds_ratio": -1.1621259450912476, "logits/chosen": -0.8955706357955933, "logits/rejected": -0.9022989273071289, "logps/chosen": -3.5300941467285156, "logps/rejected": -3.172628879547119, "loss": 2.6415, "nll_loss": 2.525275707244873, "rewards/accuracies": 0.375, "rewards/chosen": -0.3530094027519226, "rewards/margins": -0.0357464998960495, "rewards/rejected": -0.3172629177570343, "step": 28 }, { "epoch": 0.07939767282683094, "grad_norm": 3.6896843910217285, "learning_rate": 9.96027397260274e-07, "log_odds_chosen": -1.218933343887329, "log_odds_ratio": -1.6911286115646362, "logits/chosen": -0.9412728548049927, "logits/rejected": -0.8919946551322937, "logps/chosen": -4.091635227203369, "logps/rejected": -2.866482973098755, "loss": 2.8595, "nll_loss": 2.690384864807129, "rewards/accuracies": 0.25, "rewards/chosen": -0.40916356444358826, "rewards/margins": -0.12251526117324829, "rewards/rejected": -0.28664830327033997, "step": 29 }, { "epoch": 0.08213552361396304, "grad_norm": 3.5185229778289795, "learning_rate": 9.95890410958904e-07, "log_odds_chosen": -1.5356273651123047, "log_odds_ratio": -2.1136040687561035, "logits/chosen": -0.9350854158401489, "logits/rejected": -0.8856633901596069, "logps/chosen": -4.274277210235596, "logps/rejected": -2.726356267929077, "loss": 2.7865, "nll_loss": 2.5751452445983887, "rewards/accuracies": 0.375, "rewards/chosen": -0.4274277091026306, "rewards/margins": -0.15479210019111633, "rewards/rejected": -0.2726356089115143, "step": 30 }, { "epoch": 0.08487337440109514, "grad_norm": 3.8082239627838135, "learning_rate": 9.957534246575342e-07, "log_odds_chosen": -1.1255635023117065, "log_odds_ratio": -1.5257725715637207, "logits/chosen": -0.9883882999420166, "logits/rejected": -0.927241325378418, "logps/chosen": -3.9310293197631836, "logps/rejected": -2.8216962814331055, "loss": 2.7588, "nll_loss": 2.6062679290771484, "rewards/accuracies": 0.25, "rewards/chosen": -0.3931029438972473, "rewards/margins": -0.11093328893184662, "rewards/rejected": -0.2821696400642395, "step": 31 }, { "epoch": 0.08761122518822724, "grad_norm": 3.581867218017578, "learning_rate": 9.956164383561644e-07, "log_odds_chosen": -0.040575265884399414, "log_odds_ratio": -1.7848801612854004, "logits/chosen": -0.98943692445755, "logits/rejected": -1.052252173423767, "logps/chosen": -3.982788562774658, "logps/rejected": -3.886491298675537, "loss": 2.6816, "nll_loss": 2.5030906200408936, "rewards/accuracies": 0.375, "rewards/chosen": -0.3982788324356079, "rewards/margins": -0.009629704058170319, "rewards/rejected": -0.3886491656303406, "step": 32 }, { "epoch": 0.09034907597535935, "grad_norm": 3.0988357067108154, "learning_rate": 9.954794520547944e-07, "log_odds_chosen": -0.12104825675487518, "log_odds_ratio": -0.8169374465942383, "logits/chosen": -0.9245313405990601, "logits/rejected": -0.9615892767906189, "logps/chosen": -2.7275900840759277, "logps/rejected": -2.5707623958587646, "loss": 2.5106, "nll_loss": 2.4288690090179443, "rewards/accuracies": 0.625, "rewards/chosen": -0.27275902032852173, "rewards/margins": -0.015682779252529144, "rewards/rejected": -0.257076233625412, "step": 33 }, { "epoch": 0.09308692676249145, "grad_norm": 3.0455188751220703, "learning_rate": 9.953424657534246e-07, "log_odds_chosen": -0.21330764889717102, "log_odds_ratio": -1.0050015449523926, "logits/chosen": -0.8763282299041748, "logits/rejected": -0.9322528839111328, "logps/chosen": -3.405508518218994, "logps/rejected": -3.148069381713867, "loss": 2.5411, "nll_loss": 2.4406449794769287, "rewards/accuracies": 0.75, "rewards/chosen": -0.34055083990097046, "rewards/margins": -0.025743916630744934, "rewards/rejected": -0.3148069381713867, "step": 34 }, { "epoch": 0.09582477754962354, "grad_norm": 3.095365047454834, "learning_rate": 9.952054794520548e-07, "log_odds_chosen": -0.456852525472641, "log_odds_ratio": -1.1504400968551636, "logits/chosen": -1.037177324295044, "logits/rejected": -1.099656343460083, "logps/chosen": -3.229046583175659, "logps/rejected": -2.7405736446380615, "loss": 2.476, "nll_loss": 2.3609859943389893, "rewards/accuracies": 0.375, "rewards/chosen": -0.32290467619895935, "rewards/margins": -0.04884728789329529, "rewards/rejected": -0.27405738830566406, "step": 35 }, { "epoch": 0.09856262833675565, "grad_norm": 3.446467161178589, "learning_rate": 9.950684931506848e-07, "log_odds_chosen": -0.9263155460357666, "log_odds_ratio": -1.3581264019012451, "logits/chosen": -1.0543384552001953, "logits/rejected": -0.9880855083465576, "logps/chosen": -3.253249168395996, "logps/rejected": -2.3627512454986572, "loss": 2.7167, "nll_loss": 2.580902576446533, "rewards/accuracies": 0.125, "rewards/chosen": -0.3253249228000641, "rewards/margins": -0.08904977887868881, "rewards/rejected": -0.2362751066684723, "step": 36 }, { "epoch": 0.10130047912388775, "grad_norm": 3.0595040321350098, "learning_rate": 9.94931506849315e-07, "log_odds_chosen": 0.36326226592063904, "log_odds_ratio": -0.6339438557624817, "logits/chosen": -0.9270715713500977, "logits/rejected": -0.9281109571456909, "logps/chosen": -2.6123485565185547, "logps/rejected": -2.895297050476074, "loss": 2.423, "nll_loss": 2.3595988750457764, "rewards/accuracies": 0.75, "rewards/chosen": -0.261234849691391, "rewards/margins": 0.028294850140810013, "rewards/rejected": -0.2895297110080719, "step": 37 }, { "epoch": 0.10403832991101986, "grad_norm": 3.221231698989868, "learning_rate": 9.947945205479452e-07, "log_odds_chosen": -0.43637141585350037, "log_odds_ratio": -1.136945128440857, "logits/chosen": -0.9251302480697632, "logits/rejected": -0.9151610136032104, "logps/chosen": -3.360614061355591, "logps/rejected": -2.8774147033691406, "loss": 2.5592, "nll_loss": 2.4455490112304688, "rewards/accuracies": 0.5, "rewards/chosen": -0.33606138825416565, "rewards/margins": -0.04831992834806442, "rewards/rejected": -0.287741482257843, "step": 38 }, { "epoch": 0.10677618069815195, "grad_norm": 3.202396869659424, "learning_rate": 9.946575342465752e-07, "log_odds_chosen": 0.28445175290107727, "log_odds_ratio": -0.6425720453262329, "logits/chosen": -1.0692996978759766, "logits/rejected": -1.054713487625122, "logps/chosen": -3.0059075355529785, "logps/rejected": -3.286362648010254, "loss": 2.6259, "nll_loss": 2.5616607666015625, "rewards/accuracies": 0.5, "rewards/chosen": -0.30059075355529785, "rewards/margins": 0.028045518323779106, "rewards/rejected": -0.3286362886428833, "step": 39 }, { "epoch": 0.10951403148528405, "grad_norm": 3.503584146499634, "learning_rate": 9.945205479452054e-07, "log_odds_chosen": -0.19220398366451263, "log_odds_ratio": -0.9919863939285278, "logits/chosen": -1.0240229368209839, "logits/rejected": -1.0615441799163818, "logps/chosen": -3.4710209369659424, "logps/rejected": -3.237278938293457, "loss": 2.6767, "nll_loss": 2.577547788619995, "rewards/accuracies": 0.625, "rewards/chosen": -0.3471021056175232, "rewards/margins": -0.023374173790216446, "rewards/rejected": -0.32372793555259705, "step": 40 }, { "epoch": 0.11225188227241616, "grad_norm": 3.2584927082061768, "learning_rate": 9.943835616438356e-07, "log_odds_chosen": -1.0145578384399414, "log_odds_ratio": -1.5280964374542236, "logits/chosen": -0.954757809638977, "logits/rejected": -0.977424681186676, "logps/chosen": -3.4694790840148926, "logps/rejected": -2.454672336578369, "loss": 2.5476, "nll_loss": 2.3948333263397217, "rewards/accuracies": 0.375, "rewards/chosen": -0.34694790840148926, "rewards/margins": -0.10148066282272339, "rewards/rejected": -0.24546727538108826, "step": 41 }, { "epoch": 0.11498973305954825, "grad_norm": 3.0581846237182617, "learning_rate": 9.942465753424656e-07, "log_odds_chosen": 0.7696634531021118, "log_odds_ratio": -0.49970296025276184, "logits/chosen": -0.93128502368927, "logits/rejected": -0.9841890335083008, "logps/chosen": -2.7720816135406494, "logps/rejected": -3.4907822608947754, "loss": 2.5128, "nll_loss": 2.462815523147583, "rewards/accuracies": 0.75, "rewards/chosen": -0.2772081792354584, "rewards/margins": 0.07187005132436752, "rewards/rejected": -0.3490782380104065, "step": 42 }, { "epoch": 0.11772758384668036, "grad_norm": 3.233919382095337, "learning_rate": 9.941095890410958e-07, "log_odds_chosen": -1.2374681234359741, "log_odds_ratio": -1.7985851764678955, "logits/chosen": -1.1468861103057861, "logits/rejected": -1.1390553712844849, "logps/chosen": -3.750523328781128, "logps/rejected": -2.5257205963134766, "loss": 2.6475, "nll_loss": 2.4676456451416016, "rewards/accuracies": 0.375, "rewards/chosen": -0.3750523626804352, "rewards/margins": -0.12248028814792633, "rewards/rejected": -0.25257205963134766, "step": 43 }, { "epoch": 0.12046543463381246, "grad_norm": 3.7626194953918457, "learning_rate": 9.93972602739726e-07, "log_odds_chosen": -0.8812930583953857, "log_odds_ratio": -1.902374267578125, "logits/chosen": -0.9840712547302246, "logits/rejected": -0.9485246539115906, "logps/chosen": -4.418756484985352, "logps/rejected": -3.528292179107666, "loss": 2.7358, "nll_loss": 2.5455667972564697, "rewards/accuracies": 0.375, "rewards/chosen": -0.4418756067752838, "rewards/margins": -0.08904638886451721, "rewards/rejected": -0.3528292179107666, "step": 44 }, { "epoch": 0.12320328542094455, "grad_norm": 3.7497665882110596, "learning_rate": 9.938356164383562e-07, "log_odds_chosen": -1.218980073928833, "log_odds_ratio": -1.593153715133667, "logits/chosen": -1.0033208131790161, "logits/rejected": -0.9544165730476379, "logps/chosen": -3.7686028480529785, "logps/rejected": -2.5972325801849365, "loss": 2.7122, "nll_loss": 2.552868127822876, "rewards/accuracies": 0.125, "rewards/chosen": -0.37686029076576233, "rewards/margins": -0.11713705211877823, "rewards/rejected": -0.2597232460975647, "step": 45 }, { "epoch": 0.12594113620807665, "grad_norm": 3.226763963699341, "learning_rate": 9.936986301369862e-07, "log_odds_chosen": -0.7820941209793091, "log_odds_ratio": -1.3462257385253906, "logits/chosen": -0.9973956942558289, "logits/rejected": -1.0131598711013794, "logps/chosen": -3.527803421020508, "logps/rejected": -2.762380361557007, "loss": 2.6591, "nll_loss": 2.524507999420166, "rewards/accuracies": 0.25, "rewards/chosen": -0.352780282497406, "rewards/margins": -0.07654227316379547, "rewards/rejected": -0.27623802423477173, "step": 46 }, { "epoch": 0.12867898699520877, "grad_norm": 3.4998631477355957, "learning_rate": 9.935616438356164e-07, "log_odds_chosen": -0.6931473612785339, "log_odds_ratio": -1.1699886322021484, "logits/chosen": -1.024360179901123, "logits/rejected": -0.9915997982025146, "logps/chosen": -2.6576194763183594, "logps/rejected": -2.048466682434082, "loss": 2.6549, "nll_loss": 2.537935256958008, "rewards/accuracies": 0.25, "rewards/chosen": -0.26576194167137146, "rewards/margins": -0.06091528385877609, "rewards/rejected": -0.20484665036201477, "step": 47 }, { "epoch": 0.13141683778234087, "grad_norm": 3.1999990940093994, "learning_rate": 9.934246575342466e-07, "log_odds_chosen": -0.4278365969657898, "log_odds_ratio": -1.2820472717285156, "logits/chosen": -0.9717689752578735, "logits/rejected": -0.9443994760513306, "logps/chosen": -3.190329074859619, "logps/rejected": -2.7494959831237793, "loss": 2.5757, "nll_loss": 2.4475417137145996, "rewards/accuracies": 0.5, "rewards/chosen": -0.3190329074859619, "rewards/margins": -0.044083308428525925, "rewards/rejected": -0.2749496400356293, "step": 48 }, { "epoch": 0.13415468856947296, "grad_norm": 3.4573261737823486, "learning_rate": 9.932876712328766e-07, "log_odds_chosen": 0.6227337121963501, "log_odds_ratio": -0.6796971559524536, "logits/chosen": -1.0109398365020752, "logits/rejected": -1.0079877376556396, "logps/chosen": -3.1388940811157227, "logps/rejected": -3.7235724925994873, "loss": 2.6504, "nll_loss": 2.582411050796509, "rewards/accuracies": 0.5, "rewards/chosen": -0.31388938426971436, "rewards/margins": 0.05846785008907318, "rewards/rejected": -0.37235724925994873, "step": 49 }, { "epoch": 0.13689253935660506, "grad_norm": 3.792154312133789, "learning_rate": 9.931506849315068e-07, "log_odds_chosen": -0.5093178153038025, "log_odds_ratio": -1.1828991174697876, "logits/chosen": -1.000579595565796, "logits/rejected": -0.9469153881072998, "logps/chosen": -3.018587112426758, "logps/rejected": -2.496516227722168, "loss": 2.5994, "nll_loss": 2.4811248779296875, "rewards/accuracies": 0.5, "rewards/chosen": -0.30185869336128235, "rewards/margins": -0.05220707505941391, "rewards/rejected": -0.24965164065361023, "step": 50 }, { "epoch": 0.13963039014373715, "grad_norm": 3.1954898834228516, "learning_rate": 9.93013698630137e-07, "log_odds_chosen": 0.05889061838388443, "log_odds_ratio": -0.7507599592208862, "logits/chosen": -0.9227021932601929, "logits/rejected": -0.9443502426147461, "logps/chosen": -2.974925994873047, "logps/rejected": -3.007483720779419, "loss": 2.5933, "nll_loss": 2.518272876739502, "rewards/accuracies": 0.625, "rewards/chosen": -0.2974925935268402, "rewards/margins": 0.0032557863742113113, "rewards/rejected": -0.30074837803840637, "step": 51 }, { "epoch": 0.14236824093086928, "grad_norm": 3.947071075439453, "learning_rate": 9.928767123287672e-07, "log_odds_chosen": -1.7422120571136475, "log_odds_ratio": -2.30293869972229, "logits/chosen": -0.9698164463043213, "logits/rejected": -1.0295350551605225, "logps/chosen": -4.519162178039551, "logps/rejected": -2.8055665493011475, "loss": 2.7832, "nll_loss": 2.552924871444702, "rewards/accuracies": 0.125, "rewards/chosen": -0.4519162178039551, "rewards/margins": -0.17135955393314362, "rewards/rejected": -0.28055667877197266, "step": 52 }, { "epoch": 0.14510609171800137, "grad_norm": 3.2610883712768555, "learning_rate": 9.927397260273972e-07, "log_odds_chosen": 0.6452324390411377, "log_odds_ratio": -0.9608546495437622, "logits/chosen": -1.080498456954956, "logits/rejected": -1.1192996501922607, "logps/chosen": -2.9264631271362305, "logps/rejected": -3.57364559173584, "loss": 2.661, "nll_loss": 2.564880847930908, "rewards/accuracies": 0.375, "rewards/chosen": -0.2926463484764099, "rewards/margins": 0.06471820175647736, "rewards/rejected": -0.3573645353317261, "step": 53 }, { "epoch": 0.14784394250513347, "grad_norm": 3.767521381378174, "learning_rate": 9.926027397260274e-07, "log_odds_chosen": -0.577194333076477, "log_odds_ratio": -1.248244285583496, "logits/chosen": -1.0034810304641724, "logits/rejected": -0.984115481376648, "logps/chosen": -4.074385643005371, "logps/rejected": -3.500664710998535, "loss": 2.7493, "nll_loss": 2.624459743499756, "rewards/accuracies": 0.25, "rewards/chosen": -0.4074385166168213, "rewards/margins": -0.0573720708489418, "rewards/rejected": -0.35006648302078247, "step": 54 }, { "epoch": 0.15058179329226556, "grad_norm": 3.063957691192627, "learning_rate": 9.924657534246574e-07, "log_odds_chosen": 1.1883480548858643, "log_odds_ratio": -0.6104085445404053, "logits/chosen": -1.0940487384796143, "logits/rejected": -1.1828625202178955, "logps/chosen": -2.4768946170806885, "logps/rejected": -3.579986095428467, "loss": 2.4428, "nll_loss": 2.381709337234497, "rewards/accuracies": 0.625, "rewards/chosen": -0.24768944084644318, "rewards/margins": 0.1103091612458229, "rewards/rejected": -0.3579986095428467, "step": 55 }, { "epoch": 0.15331964407939766, "grad_norm": 3.3439013957977295, "learning_rate": 9.923287671232876e-07, "log_odds_chosen": -0.6707841157913208, "log_odds_ratio": -1.3016283512115479, "logits/chosen": -1.0047497749328613, "logits/rejected": -1.12501060962677, "logps/chosen": -3.102134943008423, "logps/rejected": -2.4253273010253906, "loss": 2.5162, "nll_loss": 2.3860220909118652, "rewards/accuracies": 0.5, "rewards/chosen": -0.31021350622177124, "rewards/margins": -0.06768076866865158, "rewards/rejected": -0.24253273010253906, "step": 56 }, { "epoch": 0.15605749486652978, "grad_norm": 3.5855698585510254, "learning_rate": 9.921917808219178e-07, "log_odds_chosen": -0.4335431754589081, "log_odds_ratio": -1.008169174194336, "logits/chosen": -1.0126633644104004, "logits/rejected": -0.9728351831436157, "logps/chosen": -3.2679576873779297, "logps/rejected": -2.847505569458008, "loss": 2.6352, "nll_loss": 2.5344038009643555, "rewards/accuracies": 0.375, "rewards/chosen": -0.326795756816864, "rewards/margins": -0.042045220732688904, "rewards/rejected": -0.2847505509853363, "step": 57 }, { "epoch": 0.15879534565366188, "grad_norm": 3.228300094604492, "learning_rate": 9.920547945205478e-07, "log_odds_chosen": 1.2199230194091797, "log_odds_ratio": -0.6324396729469299, "logits/chosen": -0.9661234617233276, "logits/rejected": -1.0002673864364624, "logps/chosen": -2.4717791080474854, "logps/rejected": -3.5827107429504395, "loss": 2.4833, "nll_loss": 2.4200594425201416, "rewards/accuracies": 0.625, "rewards/chosen": -0.24717789888381958, "rewards/margins": 0.1110931783914566, "rewards/rejected": -0.3582710921764374, "step": 58 }, { "epoch": 0.16153319644079397, "grad_norm": 3.406118631362915, "learning_rate": 9.91917808219178e-07, "log_odds_chosen": -0.9762267470359802, "log_odds_ratio": -1.4436544179916382, "logits/chosen": -0.9930115342140198, "logits/rejected": -0.9768744111061096, "logps/chosen": -3.519002914428711, "logps/rejected": -2.5642905235290527, "loss": 2.7181, "nll_loss": 2.573711633682251, "rewards/accuracies": 0.375, "rewards/chosen": -0.3519003093242645, "rewards/margins": -0.09547124058008194, "rewards/rejected": -0.2564290761947632, "step": 59 }, { "epoch": 0.16427104722792607, "grad_norm": 3.1104161739349365, "learning_rate": 9.917808219178082e-07, "log_odds_chosen": 0.69125896692276, "log_odds_ratio": -0.5959937572479248, "logits/chosen": -0.9047829508781433, "logits/rejected": -0.9430214166641235, "logps/chosen": -2.8033130168914795, "logps/rejected": -3.429898262023926, "loss": 2.5134, "nll_loss": 2.453794479370117, "rewards/accuracies": 0.75, "rewards/chosen": -0.2803313136100769, "rewards/margins": 0.06265853345394135, "rewards/rejected": -0.34298986196517944, "step": 60 }, { "epoch": 0.16700889801505817, "grad_norm": 3.624727725982666, "learning_rate": 9.916438356164382e-07, "log_odds_chosen": -0.7407705783843994, "log_odds_ratio": -1.3303425312042236, "logits/chosen": -1.0508767366409302, "logits/rejected": -1.0694258213043213, "logps/chosen": -3.614408254623413, "logps/rejected": -2.8874809741973877, "loss": 2.7029, "nll_loss": 2.569838523864746, "rewards/accuracies": 0.375, "rewards/chosen": -0.36144083738327026, "rewards/margins": -0.07269272208213806, "rewards/rejected": -0.2887481153011322, "step": 61 }, { "epoch": 0.1697467488021903, "grad_norm": 3.1116116046905518, "learning_rate": 9.915068493150684e-07, "log_odds_chosen": -0.2722044885158539, "log_odds_ratio": -0.9546802639961243, "logits/chosen": -1.0614721775054932, "logits/rejected": -1.093877911567688, "logps/chosen": -2.5293617248535156, "logps/rejected": -2.26615571975708, "loss": 2.4642, "nll_loss": 2.36869478225708, "rewards/accuracies": 0.5, "rewards/chosen": -0.2529361844062805, "rewards/margins": -0.026320625096559525, "rewards/rejected": -0.22661557793617249, "step": 62 }, { "epoch": 0.17248459958932238, "grad_norm": 3.519420623779297, "learning_rate": 9.913698630136986e-07, "log_odds_chosen": -0.269711971282959, "log_odds_ratio": -1.0890251398086548, "logits/chosen": -1.0771327018737793, "logits/rejected": -1.0870440006256104, "logps/chosen": -3.4009757041931152, "logps/rejected": -3.079171657562256, "loss": 2.681, "nll_loss": 2.5721359252929688, "rewards/accuracies": 0.625, "rewards/chosen": -0.3400976061820984, "rewards/margins": -0.032180432230234146, "rewards/rejected": -0.30791717767715454, "step": 63 }, { "epoch": 0.17522245037645448, "grad_norm": 3.5176796913146973, "learning_rate": 9.912328767123286e-07, "log_odds_chosen": -0.5459843873977661, "log_odds_ratio": -1.1888675689697266, "logits/chosen": -0.978266716003418, "logits/rejected": -0.9882693290710449, "logps/chosen": -3.1501870155334473, "logps/rejected": -2.586683750152588, "loss": 2.6581, "nll_loss": 2.539203405380249, "rewards/accuracies": 0.25, "rewards/chosen": -0.3150186836719513, "rewards/margins": -0.05635031312704086, "rewards/rejected": -0.25866836309432983, "step": 64 }, { "epoch": 0.17796030116358658, "grad_norm": 3.2566869258880615, "learning_rate": 9.910958904109588e-07, "log_odds_chosen": -0.34792038798332214, "log_odds_ratio": -1.077898621559143, "logits/chosen": -1.0149846076965332, "logits/rejected": -1.0502235889434814, "logps/chosen": -2.768282651901245, "logps/rejected": -2.3941287994384766, "loss": 2.5041, "nll_loss": 2.3963558673858643, "rewards/accuracies": 0.625, "rewards/chosen": -0.2768282890319824, "rewards/margins": -0.037415385246276855, "rewards/rejected": -0.23941290378570557, "step": 65 }, { "epoch": 0.1806981519507187, "grad_norm": 3.50449275970459, "learning_rate": 9.90958904109589e-07, "log_odds_chosen": 0.2006177008152008, "log_odds_ratio": -0.7724001407623291, "logits/chosen": -0.9652442932128906, "logits/rejected": -0.9444387555122375, "logps/chosen": -2.8230643272399902, "logps/rejected": -2.995136260986328, "loss": 2.6136, "nll_loss": 2.5363259315490723, "rewards/accuracies": 0.375, "rewards/chosen": -0.2823064625263214, "rewards/margins": 0.01720719039440155, "rewards/rejected": -0.29951363801956177, "step": 66 }, { "epoch": 0.1834360027378508, "grad_norm": 3.540684223175049, "learning_rate": 9.90821917808219e-07, "log_odds_chosen": -1.00186288356781, "log_odds_ratio": -1.5719554424285889, "logits/chosen": -1.028850793838501, "logits/rejected": -0.9999632835388184, "logps/chosen": -3.6031956672668457, "logps/rejected": -2.582226037979126, "loss": 2.7763, "nll_loss": 2.619086503982544, "rewards/accuracies": 0.375, "rewards/chosen": -0.3603195548057556, "rewards/margins": -0.10209696739912033, "rewards/rejected": -0.2582225799560547, "step": 67 }, { "epoch": 0.1861738535249829, "grad_norm": 3.1822686195373535, "learning_rate": 9.906849315068492e-07, "log_odds_chosen": -0.2103220522403717, "log_odds_ratio": -0.9029016494750977, "logits/chosen": -0.938464343547821, "logits/rejected": -0.9328216314315796, "logps/chosen": -2.6127495765686035, "logps/rejected": -2.390486240386963, "loss": 2.4947, "nll_loss": 2.4043681621551514, "rewards/accuracies": 0.375, "rewards/chosen": -0.26127493381500244, "rewards/margins": -0.022226322442293167, "rewards/rejected": -0.23904862999916077, "step": 68 }, { "epoch": 0.188911704312115, "grad_norm": 3.5215094089508057, "learning_rate": 9.905479452054794e-07, "log_odds_chosen": -0.11547226458787918, "log_odds_ratio": -0.841079831123352, "logits/chosen": -1.0478079319000244, "logits/rejected": -1.0728495121002197, "logps/chosen": -2.8842806816101074, "logps/rejected": -2.752790927886963, "loss": 2.611, "nll_loss": 2.5269293785095215, "rewards/accuracies": 0.375, "rewards/chosen": -0.28842809796333313, "rewards/margins": -0.01314898394048214, "rewards/rejected": -0.27527910470962524, "step": 69 }, { "epoch": 0.19164955509924708, "grad_norm": 3.1444272994995117, "learning_rate": 9.904109589041094e-07, "log_odds_chosen": 0.3853911757469177, "log_odds_ratio": -0.7761666774749756, "logits/chosen": -0.8496524691581726, "logits/rejected": -0.9935747981071472, "logps/chosen": -2.169492244720459, "logps/rejected": -2.4410715103149414, "loss": 2.3573, "nll_loss": 2.2796378135681152, "rewards/accuracies": 0.75, "rewards/chosen": -0.2169492393732071, "rewards/margins": 0.027157926931977272, "rewards/rejected": -0.2441071718931198, "step": 70 }, { "epoch": 0.1943874058863792, "grad_norm": 2.885634183883667, "learning_rate": 9.902739726027396e-07, "log_odds_chosen": 0.8262771964073181, "log_odds_ratio": -0.5517247915267944, "logits/chosen": -0.888378381729126, "logits/rejected": -0.9597537517547607, "logps/chosen": -1.9005041122436523, "logps/rejected": -2.6531295776367188, "loss": 2.3761, "nll_loss": 2.3209564685821533, "rewards/accuracies": 0.625, "rewards/chosen": -0.1900504231452942, "rewards/margins": 0.07526254653930664, "rewards/rejected": -0.26531296968460083, "step": 71 }, { "epoch": 0.1971252566735113, "grad_norm": 3.7908854484558105, "learning_rate": 9.901369863013698e-07, "log_odds_chosen": -0.07297167181968689, "log_odds_ratio": -1.164433240890503, "logits/chosen": -1.0600478649139404, "logits/rejected": -1.0254573822021484, "logps/chosen": -3.2665457725524902, "logps/rejected": -3.1761152744293213, "loss": 2.6059, "nll_loss": 2.489471912384033, "rewards/accuracies": 0.625, "rewards/chosen": -0.3266545832157135, "rewards/margins": -0.00904306024312973, "rewards/rejected": -0.3176115155220032, "step": 72 }, { "epoch": 0.1998631074606434, "grad_norm": 3.377512216567993, "learning_rate": 9.9e-07, "log_odds_chosen": -0.3949001729488373, "log_odds_ratio": -1.0756080150604248, "logits/chosen": -0.9518582224845886, "logits/rejected": -0.9649754762649536, "logps/chosen": -3.369264841079712, "logps/rejected": -2.9303643703460693, "loss": 2.543, "nll_loss": 2.435479164123535, "rewards/accuracies": 0.5, "rewards/chosen": -0.33692649006843567, "rewards/margins": -0.04389005899429321, "rewards/rejected": -0.29303640127182007, "step": 73 }, { "epoch": 0.2026009582477755, "grad_norm": 3.0144596099853516, "learning_rate": 9.8986301369863e-07, "log_odds_chosen": 0.13420513272285461, "log_odds_ratio": -0.7166808247566223, "logits/chosen": -1.0175421237945557, "logits/rejected": -1.0335230827331543, "logps/chosen": -2.4380221366882324, "logps/rejected": -2.5373125076293945, "loss": 2.4215, "nll_loss": 2.3498473167419434, "rewards/accuracies": 0.75, "rewards/chosen": -0.2438022345304489, "rewards/margins": 0.009929012507200241, "rewards/rejected": -0.25373125076293945, "step": 74 }, { "epoch": 0.2053388090349076, "grad_norm": 3.6281495094299316, "learning_rate": 9.897260273972602e-07, "log_odds_chosen": -0.5922812223434448, "log_odds_ratio": -1.240749716758728, "logits/chosen": -0.9770914316177368, "logits/rejected": -0.9620495438575745, "logps/chosen": -3.5886237621307373, "logps/rejected": -2.9903130531311035, "loss": 2.7889, "nll_loss": 2.6647982597351074, "rewards/accuracies": 0.375, "rewards/chosen": -0.35886240005493164, "rewards/margins": -0.059831079095602036, "rewards/rejected": -0.2990313172340393, "step": 75 }, { "epoch": 0.2080766598220397, "grad_norm": 3.433006525039673, "learning_rate": 9.895890410958905e-07, "log_odds_chosen": -0.25794118642807007, "log_odds_ratio": -1.091044545173645, "logits/chosen": -0.9544941186904907, "logits/rejected": -0.9791043400764465, "logps/chosen": -3.3994128704071045, "logps/rejected": -3.1053779125213623, "loss": 2.5789, "nll_loss": 2.4698376655578613, "rewards/accuracies": 0.375, "rewards/chosen": -0.3399412930011749, "rewards/margins": -0.02940351702272892, "rewards/rejected": -0.31053778529167175, "step": 76 }, { "epoch": 0.2108145106091718, "grad_norm": 3.5377037525177, "learning_rate": 9.894520547945204e-07, "log_odds_chosen": 0.11093243956565857, "log_odds_ratio": -0.7584233283996582, "logits/chosen": -0.9502558708190918, "logits/rejected": -0.9302322864532471, "logps/chosen": -3.0108048915863037, "logps/rejected": -3.1044230461120605, "loss": 2.6223, "nll_loss": 2.5464673042297363, "rewards/accuracies": 0.625, "rewards/chosen": -0.30108049511909485, "rewards/margins": 0.009361827746033669, "rewards/rejected": -0.31044232845306396, "step": 77 }, { "epoch": 0.2135523613963039, "grad_norm": 3.161001205444336, "learning_rate": 9.893150684931507e-07, "log_odds_chosen": 0.32682162523269653, "log_odds_ratio": -0.6835559606552124, "logits/chosen": -0.892565906047821, "logits/rejected": -0.9868108630180359, "logps/chosen": -2.3559396266937256, "logps/rejected": -2.647196054458618, "loss": 2.4776, "nll_loss": 2.4092929363250732, "rewards/accuracies": 0.75, "rewards/chosen": -0.23559395968914032, "rewards/margins": 0.029125651344656944, "rewards/rejected": -0.2647196054458618, "step": 78 }, { "epoch": 0.216290212183436, "grad_norm": 3.619816780090332, "learning_rate": 9.891780821917809e-07, "log_odds_chosen": -0.18952032923698425, "log_odds_ratio": -0.879361629486084, "logits/chosen": -1.0641273260116577, "logits/rejected": -1.0367836952209473, "logps/chosen": -3.3640027046203613, "logps/rejected": -3.164318084716797, "loss": 2.6343, "nll_loss": 2.546405792236328, "rewards/accuracies": 0.5, "rewards/chosen": -0.33640027046203613, "rewards/margins": -0.019968463107943535, "rewards/rejected": -0.31643179059028625, "step": 79 }, { "epoch": 0.2190280629705681, "grad_norm": 3.8388514518737793, "learning_rate": 9.89041095890411e-07, "log_odds_chosen": -1.0518678426742554, "log_odds_ratio": -1.5971544981002808, "logits/chosen": -1.0720750093460083, "logits/rejected": -1.080270528793335, "logps/chosen": -3.621541738510132, "logps/rejected": -2.5706586837768555, "loss": 2.7237, "nll_loss": 2.563952922821045, "rewards/accuracies": 0.375, "rewards/chosen": -0.36215418577194214, "rewards/margins": -0.10508830100297928, "rewards/rejected": -0.25706589221954346, "step": 80 }, { "epoch": 0.22176591375770022, "grad_norm": 3.4853856563568115, "learning_rate": 9.88904109589041e-07, "log_odds_chosen": 0.03704455494880676, "log_odds_ratio": -1.0021986961364746, "logits/chosen": -1.1259570121765137, "logits/rejected": -1.0584707260131836, "logps/chosen": -2.8743579387664795, "logps/rejected": -2.9206666946411133, "loss": 2.669, "nll_loss": 2.5687451362609863, "rewards/accuracies": 0.25, "rewards/chosen": -0.2874358296394348, "rewards/margins": 0.004630880430340767, "rewards/rejected": -0.29206669330596924, "step": 81 }, { "epoch": 0.2245037645448323, "grad_norm": 3.667823076248169, "learning_rate": 9.887671232876713e-07, "log_odds_chosen": -1.2190876007080078, "log_odds_ratio": -2.4342424869537354, "logits/chosen": -0.9859253168106079, "logits/rejected": -1.0443532466888428, "logps/chosen": -4.782563209533691, "logps/rejected": -3.5031044483184814, "loss": 2.8337, "nll_loss": 2.590270519256592, "rewards/accuracies": 0.625, "rewards/chosen": -0.4782562553882599, "rewards/margins": -0.12794584035873413, "rewards/rejected": -0.35031044483184814, "step": 82 }, { "epoch": 0.2272416153319644, "grad_norm": 3.52295184135437, "learning_rate": 9.886301369863015e-07, "log_odds_chosen": -0.7529314756393433, "log_odds_ratio": -1.4200632572174072, "logits/chosen": -0.9664835929870605, "logits/rejected": -0.9698041081428528, "logps/chosen": -3.383063793182373, "logps/rejected": -2.618380308151245, "loss": 2.6544, "nll_loss": 2.5123767852783203, "rewards/accuracies": 0.5, "rewards/chosen": -0.33830639719963074, "rewards/margins": -0.0764683336019516, "rewards/rejected": -0.26183804869651794, "step": 83 }, { "epoch": 0.2299794661190965, "grad_norm": 3.4980788230895996, "learning_rate": 9.884931506849315e-07, "log_odds_chosen": 0.0035132914781570435, "log_odds_ratio": -0.8108493089675903, "logits/chosen": -0.9365254640579224, "logits/rejected": -0.9064285755157471, "logps/chosen": -2.7716665267944336, "logps/rejected": -2.775862455368042, "loss": 2.5819, "nll_loss": 2.500821352005005, "rewards/accuracies": 0.5, "rewards/chosen": -0.2771666646003723, "rewards/margins": 0.0004195757210254669, "rewards/rejected": -0.2775862514972687, "step": 84 }, { "epoch": 0.2327173169062286, "grad_norm": 3.180957078933716, "learning_rate": 9.883561643835617e-07, "log_odds_chosen": -0.43242311477661133, "log_odds_ratio": -1.072009563446045, "logits/chosen": -0.9522624611854553, "logits/rejected": -0.931322455406189, "logps/chosen": -2.7039170265197754, "logps/rejected": -2.280003309249878, "loss": 2.5313, "nll_loss": 2.4240801334381104, "rewards/accuracies": 0.375, "rewards/chosen": -0.27039170265197754, "rewards/margins": -0.042391352355480194, "rewards/rejected": -0.22800032794475555, "step": 85 }, { "epoch": 0.23545516769336072, "grad_norm": 3.5718564987182617, "learning_rate": 9.882191780821917e-07, "log_odds_chosen": -1.0654886960983276, "log_odds_ratio": -1.7818694114685059, "logits/chosen": -0.9887776970863342, "logits/rejected": -0.9653372764587402, "logps/chosen": -3.667973756790161, "logps/rejected": -2.5781402587890625, "loss": 2.6395, "nll_loss": 2.461360454559326, "rewards/accuracies": 0.625, "rewards/chosen": -0.36679738759994507, "rewards/margins": -0.10898337513208389, "rewards/rejected": -0.2578140199184418, "step": 86 }, { "epoch": 0.23819301848049282, "grad_norm": 3.592374324798584, "learning_rate": 9.880821917808219e-07, "log_odds_chosen": -0.37165409326553345, "log_odds_ratio": -1.0463296175003052, "logits/chosen": -0.9634068012237549, "logits/rejected": -0.9645075798034668, "logps/chosen": -2.7689929008483887, "logps/rejected": -2.3980302810668945, "loss": 2.592, "nll_loss": 2.487388849258423, "rewards/accuracies": 0.5, "rewards/chosen": -0.2768992781639099, "rewards/margins": -0.03709625452756882, "rewards/rejected": -0.2398030310869217, "step": 87 }, { "epoch": 0.24093086926762491, "grad_norm": 4.088083744049072, "learning_rate": 9.87945205479452e-07, "log_odds_chosen": -0.1633896827697754, "log_odds_ratio": -1.2804120779037476, "logits/chosen": -1.0442421436309814, "logits/rejected": -1.0011667013168335, "logps/chosen": -3.370375156402588, "logps/rejected": -3.193755626678467, "loss": 2.7272, "nll_loss": 2.5991265773773193, "rewards/accuracies": 0.625, "rewards/chosen": -0.3370375335216522, "rewards/margins": -0.017661944031715393, "rewards/rejected": -0.31937557458877563, "step": 88 }, { "epoch": 0.243668720054757, "grad_norm": 3.5234291553497314, "learning_rate": 9.87808219178082e-07, "log_odds_chosen": 1.0557336807250977, "log_odds_ratio": -0.8775650858879089, "logits/chosen": -0.9800406098365784, "logits/rejected": -1.0442568063735962, "logps/chosen": -2.730332374572754, "logps/rejected": -3.711589813232422, "loss": 2.5076, "nll_loss": 2.4198529720306396, "rewards/accuracies": 0.625, "rewards/chosen": -0.2730332016944885, "rewards/margins": 0.09812574833631516, "rewards/rejected": -0.37115898728370667, "step": 89 }, { "epoch": 0.2464065708418891, "grad_norm": 3.7772796154022217, "learning_rate": 9.876712328767123e-07, "log_odds_chosen": -0.5407523512840271, "log_odds_ratio": -1.2336208820343018, "logits/chosen": -1.0181456804275513, "logits/rejected": -1.009132742881775, "logps/chosen": -3.6045305728912354, "logps/rejected": -3.0435147285461426, "loss": 2.6757, "nll_loss": 2.552302122116089, "rewards/accuracies": 0.375, "rewards/chosen": -0.3604530692100525, "rewards/margins": -0.05610157549381256, "rewards/rejected": -0.3043515086174011, "step": 90 }, { "epoch": 0.24914442162902123, "grad_norm": 3.5059757232666016, "learning_rate": 9.875342465753425e-07, "log_odds_chosen": 0.17386320233345032, "log_odds_ratio": -0.7509975433349609, "logits/chosen": -1.0342501401901245, "logits/rejected": -1.0920852422714233, "logps/chosen": -2.4881484508514404, "logps/rejected": -2.642690896987915, "loss": 2.5643, "nll_loss": 2.489208221435547, "rewards/accuracies": 0.625, "rewards/chosen": -0.24881483614444733, "rewards/margins": 0.015454242005944252, "rewards/rejected": -0.264269083738327, "step": 91 }, { "epoch": 0.2518822724161533, "grad_norm": 3.590535879135132, "learning_rate": 9.873972602739725e-07, "log_odds_chosen": 0.28779998421669006, "log_odds_ratio": -0.7788072824478149, "logits/chosen": -0.8657885193824768, "logits/rejected": -0.855984091758728, "logps/chosen": -2.756242275238037, "logps/rejected": -3.0315353870391846, "loss": 2.5428, "nll_loss": 2.4649391174316406, "rewards/accuracies": 0.75, "rewards/chosen": -0.27562424540519714, "rewards/margins": 0.027529295533895493, "rewards/rejected": -0.30315354466438293, "step": 92 }, { "epoch": 0.2546201232032854, "grad_norm": 3.845461130142212, "learning_rate": 9.872602739726027e-07, "log_odds_chosen": -0.6494462490081787, "log_odds_ratio": -1.1787490844726562, "logits/chosen": -1.1239519119262695, "logits/rejected": -0.9931648969650269, "logps/chosen": -2.5227832794189453, "logps/rejected": -1.9176204204559326, "loss": 2.7153, "nll_loss": 2.5973997116088867, "rewards/accuracies": 0.375, "rewards/chosen": -0.25227832794189453, "rewards/margins": -0.06051630154252052, "rewards/rejected": -0.1917620301246643, "step": 93 }, { "epoch": 0.25735797399041754, "grad_norm": 3.4071860313415527, "learning_rate": 9.871232876712329e-07, "log_odds_chosen": 0.06700210273265839, "log_odds_ratio": -0.7826315760612488, "logits/chosen": -0.9899495840072632, "logits/rejected": -1.007636308670044, "logps/chosen": -2.787461519241333, "logps/rejected": -2.8284573554992676, "loss": 2.5531, "nll_loss": 2.4748547077178955, "rewards/accuracies": 0.5, "rewards/chosen": -0.2787461578845978, "rewards/margins": 0.004099583253264427, "rewards/rejected": -0.28284573554992676, "step": 94 }, { "epoch": 0.2600958247775496, "grad_norm": 4.004858016967773, "learning_rate": 9.869863013698629e-07, "log_odds_chosen": -0.9739891290664673, "log_odds_ratio": -1.5872565507888794, "logits/chosen": -1.0457762479782104, "logits/rejected": -1.0311870574951172, "logps/chosen": -4.5525221824646, "logps/rejected": -3.601001501083374, "loss": 2.8216, "nll_loss": 2.6628799438476562, "rewards/accuracies": 0.375, "rewards/chosen": -0.45525217056274414, "rewards/margins": -0.09515205770730972, "rewards/rejected": -0.3601001501083374, "step": 95 }, { "epoch": 0.26283367556468173, "grad_norm": 3.18113374710083, "learning_rate": 9.86849315068493e-07, "log_odds_chosen": 0.1859194040298462, "log_odds_ratio": -0.6846587657928467, "logits/chosen": -0.9175974130630493, "logits/rejected": -0.9829642176628113, "logps/chosen": -2.250458002090454, "logps/rejected": -2.358224391937256, "loss": 2.4557, "nll_loss": 2.387206554412842, "rewards/accuracies": 0.625, "rewards/chosen": -0.22504578530788422, "rewards/margins": 0.01077666413038969, "rewards/rejected": -0.23582243919372559, "step": 96 }, { "epoch": 0.2655715263518138, "grad_norm": 3.3075687885284424, "learning_rate": 9.867123287671233e-07, "log_odds_chosen": -0.13248272240161896, "log_odds_ratio": -0.8753066062927246, "logits/chosen": -1.0783147811889648, "logits/rejected": -1.097089409828186, "logps/chosen": -3.5713250637054443, "logps/rejected": -3.402148723602295, "loss": 2.4379, "nll_loss": 2.3503692150115967, "rewards/accuracies": 0.625, "rewards/chosen": -0.35713255405426025, "rewards/margins": -0.016917631030082703, "rewards/rejected": -0.34021490812301636, "step": 97 }, { "epoch": 0.2683093771389459, "grad_norm": 3.5943498611450195, "learning_rate": 9.865753424657533e-07, "log_odds_chosen": -0.601640522480011, "log_odds_ratio": -1.2091643810272217, "logits/chosen": -1.0574804544448853, "logits/rejected": -1.1006596088409424, "logps/chosen": -3.3202245235443115, "logps/rejected": -2.7050652503967285, "loss": 2.5844, "nll_loss": 2.4634933471679688, "rewards/accuracies": 0.375, "rewards/chosen": -0.332022488117218, "rewards/margins": -0.061515942215919495, "rewards/rejected": -0.2705065608024597, "step": 98 }, { "epoch": 0.27104722792607805, "grad_norm": 3.6736812591552734, "learning_rate": 9.864383561643835e-07, "log_odds_chosen": -0.13918939232826233, "log_odds_ratio": -1.0015945434570312, "logits/chosen": -1.0390197038650513, "logits/rejected": -1.067232370376587, "logps/chosen": -3.2177557945251465, "logps/rejected": -3.034931182861328, "loss": 2.5516, "nll_loss": 2.4514708518981934, "rewards/accuracies": 0.75, "rewards/chosen": -0.32177555561065674, "rewards/margins": -0.018282450735569, "rewards/rejected": -0.3034931421279907, "step": 99 }, { "epoch": 0.2737850787132101, "grad_norm": 3.693080425262451, "learning_rate": 9.863013698630137e-07, "log_odds_chosen": -0.48497697710990906, "log_odds_ratio": -1.099692702293396, "logits/chosen": -1.0127052068710327, "logits/rejected": -1.030796766281128, "logps/chosen": -3.0201330184936523, "logps/rejected": -2.5347280502319336, "loss": 2.547, "nll_loss": 2.4370133876800537, "rewards/accuracies": 0.375, "rewards/chosen": -0.3020133376121521, "rewards/margins": -0.048540495336055756, "rewards/rejected": -0.25347280502319336, "step": 100 }, { "epoch": 0.27652292950034224, "grad_norm": 3.140319585800171, "learning_rate": 9.861643835616437e-07, "log_odds_chosen": 1.1084911823272705, "log_odds_ratio": -0.612861156463623, "logits/chosen": -0.8817093968391418, "logits/rejected": -0.9998610019683838, "logps/chosen": -2.096961498260498, "logps/rejected": -3.1262896060943604, "loss": 2.4543, "nll_loss": 2.3929860591888428, "rewards/accuracies": 0.75, "rewards/chosen": -0.20969617366790771, "rewards/margins": 0.10293281078338623, "rewards/rejected": -0.31262898445129395, "step": 101 }, { "epoch": 0.2792607802874743, "grad_norm": 3.619478940963745, "learning_rate": 9.860273972602739e-07, "log_odds_chosen": 0.23079223930835724, "log_odds_ratio": -1.0416020154953003, "logits/chosen": -1.0878859758377075, "logits/rejected": -1.0929261445999146, "logps/chosen": -3.3047428131103516, "logps/rejected": -3.5002615451812744, "loss": 2.5895, "nll_loss": 2.485302448272705, "rewards/accuracies": 0.625, "rewards/chosen": -0.33047428727149963, "rewards/margins": 0.019551869481801987, "rewards/rejected": -0.35002613067626953, "step": 102 }, { "epoch": 0.28199863107460643, "grad_norm": 3.1560797691345215, "learning_rate": 9.85890410958904e-07, "log_odds_chosen": 0.36280179023742676, "log_odds_ratio": -0.6516567468643188, "logits/chosen": -1.1010551452636719, "logits/rejected": -1.1288232803344727, "logps/chosen": -2.36332106590271, "logps/rejected": -2.6845510005950928, "loss": 2.4269, "nll_loss": 2.3617725372314453, "rewards/accuracies": 0.75, "rewards/chosen": -0.23633211851119995, "rewards/margins": 0.032123006880283356, "rewards/rejected": -0.2684551179409027, "step": 103 }, { "epoch": 0.28473648186173856, "grad_norm": 3.679903984069824, "learning_rate": 9.857534246575343e-07, "log_odds_chosen": -0.5620570182800293, "log_odds_ratio": -1.365160346031189, "logits/chosen": -0.8827995657920837, "logits/rejected": -0.9904699325561523, "logps/chosen": -3.6675493717193604, "logps/rejected": -3.0476999282836914, "loss": 2.569, "nll_loss": 2.4324891567230225, "rewards/accuracies": 0.625, "rewards/chosen": -0.3667549192905426, "rewards/margins": -0.061984915286302567, "rewards/rejected": -0.30476999282836914, "step": 104 }, { "epoch": 0.2874743326488706, "grad_norm": 3.25508713722229, "learning_rate": 9.856164383561643e-07, "log_odds_chosen": 0.22682234644889832, "log_odds_ratio": -0.74211585521698, "logits/chosen": -0.8265759348869324, "logits/rejected": -0.9570423364639282, "logps/chosen": -2.320934295654297, "logps/rejected": -2.4660422801971436, "loss": 2.3841, "nll_loss": 2.3099334239959717, "rewards/accuracies": 0.75, "rewards/chosen": -0.2320934236049652, "rewards/margins": 0.01451079547405243, "rewards/rejected": -0.24660423398017883, "step": 105 }, { "epoch": 0.29021218343600275, "grad_norm": 3.919428586959839, "learning_rate": 9.854794520547945e-07, "log_odds_chosen": -0.8288116455078125, "log_odds_ratio": -1.4165081977844238, "logits/chosen": -0.9699836373329163, "logits/rejected": -0.9509096145629883, "logps/chosen": -3.6719589233398438, "logps/rejected": -2.827916145324707, "loss": 2.6901, "nll_loss": 2.548405170440674, "rewards/accuracies": 0.375, "rewards/chosen": -0.36719590425491333, "rewards/margins": -0.08440427482128143, "rewards/rejected": -0.2827916443347931, "step": 106 }, { "epoch": 0.2929500342231348, "grad_norm": 3.186791181564331, "learning_rate": 9.853424657534247e-07, "log_odds_chosen": -0.4840199947357178, "log_odds_ratio": -1.1082868576049805, "logits/chosen": -0.963905930519104, "logits/rejected": -0.9936773777008057, "logps/chosen": -2.4596996307373047, "logps/rejected": -1.9590284824371338, "loss": 2.3693, "nll_loss": 2.25852108001709, "rewards/accuracies": 0.625, "rewards/chosen": -0.2459699660539627, "rewards/margins": -0.050067104399204254, "rewards/rejected": -0.19590285420417786, "step": 107 }, { "epoch": 0.29568788501026694, "grad_norm": 3.975125551223755, "learning_rate": 9.852054794520547e-07, "log_odds_chosen": -0.34922513365745544, "log_odds_ratio": -1.1003246307373047, "logits/chosen": -0.9964873194694519, "logits/rejected": -0.9463049173355103, "logps/chosen": -3.6797561645507812, "logps/rejected": -3.317317247390747, "loss": 2.6651, "nll_loss": 2.555100440979004, "rewards/accuracies": 0.5, "rewards/chosen": -0.3679756224155426, "rewards/margins": -0.03624391928315163, "rewards/rejected": -0.33173173666000366, "step": 108 }, { "epoch": 0.29842573579739906, "grad_norm": 3.3316032886505127, "learning_rate": 9.850684931506849e-07, "log_odds_chosen": 0.1925189197063446, "log_odds_ratio": -0.715061604976654, "logits/chosen": -0.9725520610809326, "logits/rejected": -1.0007684230804443, "logps/chosen": -2.312516927719116, "logps/rejected": -2.4641754627227783, "loss": 2.4905, "nll_loss": 2.4189486503601074, "rewards/accuracies": 0.625, "rewards/chosen": -0.23125170171260834, "rewards/margins": 0.015165869146585464, "rewards/rejected": -0.2464175820350647, "step": 109 }, { "epoch": 0.30116358658453113, "grad_norm": 4.013968467712402, "learning_rate": 9.84931506849315e-07, "log_odds_chosen": -0.20819005370140076, "log_odds_ratio": -0.928492546081543, "logits/chosen": -1.0888571739196777, "logits/rejected": -1.0356433391571045, "logps/chosen": -3.4749960899353027, "logps/rejected": -3.239190101623535, "loss": 2.5943, "nll_loss": 2.5014026165008545, "rewards/accuracies": 0.375, "rewards/chosen": -0.3474996089935303, "rewards/margins": -0.023580607026815414, "rewards/rejected": -0.32391899824142456, "step": 110 }, { "epoch": 0.30390143737166325, "grad_norm": 3.185412883758545, "learning_rate": 9.847945205479453e-07, "log_odds_chosen": 0.08584249764680862, "log_odds_ratio": -0.6989541053771973, "logits/chosen": -0.9827895164489746, "logits/rejected": -1.040583610534668, "logps/chosen": -2.6553189754486084, "logps/rejected": -2.703808546066284, "loss": 2.4184, "nll_loss": 2.348538875579834, "rewards/accuracies": 0.625, "rewards/chosen": -0.26553189754486084, "rewards/margins": 0.0048489440232515335, "rewards/rejected": -0.2703808546066284, "step": 111 }, { "epoch": 0.3066392881587953, "grad_norm": 3.974710702896118, "learning_rate": 9.846575342465753e-07, "log_odds_chosen": -1.5320407152175903, "log_odds_ratio": -2.097407817840576, "logits/chosen": -0.937651515007019, "logits/rejected": -0.9331146478652954, "logps/chosen": -4.478943824768066, "logps/rejected": -2.9395804405212402, "loss": 2.7594, "nll_loss": 2.549704074859619, "rewards/accuracies": 0.375, "rewards/chosen": -0.4478943645954132, "rewards/margins": -0.15393629670143127, "rewards/rejected": -0.29395806789398193, "step": 112 }, { "epoch": 0.30937713894592744, "grad_norm": 3.119691848754883, "learning_rate": 9.845205479452055e-07, "log_odds_chosen": 1.0133459568023682, "log_odds_ratio": -0.6512066721916199, "logits/chosen": -1.017159342765808, "logits/rejected": -1.1504977941513062, "logps/chosen": -2.1911630630493164, "logps/rejected": -3.164001941680908, "loss": 2.3596, "nll_loss": 2.2945172786712646, "rewards/accuracies": 0.625, "rewards/chosen": -0.21911630034446716, "rewards/margins": 0.09728391468524933, "rewards/rejected": -0.3164002299308777, "step": 113 }, { "epoch": 0.31211498973305957, "grad_norm": 3.9737257957458496, "learning_rate": 9.843835616438357e-07, "log_odds_chosen": -0.7287889719009399, "log_odds_ratio": -1.2685682773590088, "logits/chosen": -1.002798080444336, "logits/rejected": -0.9846369028091431, "logps/chosen": -3.4006309509277344, "logps/rejected": -2.671844005584717, "loss": 2.6341, "nll_loss": 2.5072884559631348, "rewards/accuracies": 0.375, "rewards/chosen": -0.3400631248950958, "rewards/margins": -0.07287866622209549, "rewards/rejected": -0.26718443632125854, "step": 114 }, { "epoch": 0.31485284052019163, "grad_norm": 4.355760097503662, "learning_rate": 9.842465753424657e-07, "log_odds_chosen": -1.972926378250122, "log_odds_ratio": -2.278045892715454, "logits/chosen": -1.0431792736053467, "logits/rejected": -0.9483581781387329, "logps/chosen": -4.827139377593994, "logps/rejected": -2.9171152114868164, "loss": 2.9412, "nll_loss": 2.713435173034668, "rewards/accuracies": 0.125, "rewards/chosen": -0.4827139675617218, "rewards/margins": -0.19100245833396912, "rewards/rejected": -0.2917115092277527, "step": 115 }, { "epoch": 0.31759069130732376, "grad_norm": 3.7558512687683105, "learning_rate": 9.84109589041096e-07, "log_odds_chosen": -0.2169879674911499, "log_odds_ratio": -0.9883148670196533, "logits/chosen": -0.979587197303772, "logits/rejected": -1.0118529796600342, "logps/chosen": -2.6798346042633057, "logps/rejected": -2.461169719696045, "loss": 2.6003, "nll_loss": 2.5014443397521973, "rewards/accuracies": 0.625, "rewards/chosen": -0.26798346638679504, "rewards/margins": -0.021866511553525925, "rewards/rejected": -0.24611696600914001, "step": 116 }, { "epoch": 0.3203285420944558, "grad_norm": 3.4918909072875977, "learning_rate": 9.83972602739726e-07, "log_odds_chosen": 0.07307653874158859, "log_odds_ratio": -0.7599548101425171, "logits/chosen": -0.970568835735321, "logits/rejected": -1.0289943218231201, "logps/chosen": -2.8191888332366943, "logps/rejected": -2.8821167945861816, "loss": 2.533, "nll_loss": 2.4569597244262695, "rewards/accuracies": 0.375, "rewards/chosen": -0.28191888332366943, "rewards/margins": 0.006292782723903656, "rewards/rejected": -0.2882116734981537, "step": 117 }, { "epoch": 0.32306639288158795, "grad_norm": 4.015537261962891, "learning_rate": 9.83835616438356e-07, "log_odds_chosen": -0.34519368410110474, "log_odds_ratio": -1.0832663774490356, "logits/chosen": -1.026429295539856, "logits/rejected": -0.9796541929244995, "logps/chosen": -3.8120503425598145, "logps/rejected": -3.46943998336792, "loss": 2.6884, "nll_loss": 2.580045700073242, "rewards/accuracies": 0.375, "rewards/chosen": -0.3812050521373749, "rewards/margins": -0.034261055290699005, "rewards/rejected": -0.34694400429725647, "step": 118 }, { "epoch": 0.3258042436687201, "grad_norm": 4.054460048675537, "learning_rate": 9.836986301369863e-07, "log_odds_chosen": -0.49750980734825134, "log_odds_ratio": -1.389731526374817, "logits/chosen": -0.9961137771606445, "logits/rejected": -0.9880985021591187, "logps/chosen": -3.9688796997070312, "logps/rejected": -3.471536159515381, "loss": 2.7088, "nll_loss": 2.5698091983795166, "rewards/accuracies": 0.5, "rewards/chosen": -0.39688795804977417, "rewards/margins": -0.04973435401916504, "rewards/rejected": -0.3471536338329315, "step": 119 }, { "epoch": 0.32854209445585214, "grad_norm": 3.8061745166778564, "learning_rate": 9.835616438356163e-07, "log_odds_chosen": -1.2569694519042969, "log_odds_ratio": -1.7923458814620972, "logits/chosen": -1.1331326961517334, "logits/rejected": -1.1526155471801758, "logps/chosen": -3.775611639022827, "logps/rejected": -2.5085599422454834, "loss": 2.6614, "nll_loss": 2.482147693634033, "rewards/accuracies": 0.375, "rewards/chosen": -0.37756115198135376, "rewards/margins": -0.12670518457889557, "rewards/rejected": -0.2508559823036194, "step": 120 }, { "epoch": 0.33127994524298426, "grad_norm": 3.342951774597168, "learning_rate": 9.834246575342465e-07, "log_odds_chosen": -0.11688204109668732, "log_odds_ratio": -0.892427921295166, "logits/chosen": -1.0117907524108887, "logits/rejected": -1.0416163206100464, "logps/chosen": -2.84859299659729, "logps/rejected": -2.7213151454925537, "loss": 2.4658, "nll_loss": 2.3765194416046143, "rewards/accuracies": 0.625, "rewards/chosen": -0.2848593294620514, "rewards/margins": -0.012727789580821991, "rewards/rejected": -0.2721315324306488, "step": 121 }, { "epoch": 0.33401779603011633, "grad_norm": 4.232852935791016, "learning_rate": 9.832876712328767e-07, "log_odds_chosen": -0.43300867080688477, "log_odds_ratio": -1.4250351190567017, "logits/chosen": -0.993843138217926, "logits/rejected": -0.9531700611114502, "logps/chosen": -3.6364920139312744, "logps/rejected": -3.228724479675293, "loss": 2.7336, "nll_loss": 2.591048240661621, "rewards/accuracies": 0.25, "rewards/chosen": -0.3636491894721985, "rewards/margins": -0.04077676683664322, "rewards/rejected": -0.32287245988845825, "step": 122 }, { "epoch": 0.33675564681724846, "grad_norm": 3.5749781131744385, "learning_rate": 9.831506849315067e-07, "log_odds_chosen": -0.5039313435554504, "log_odds_ratio": -1.2754706144332886, "logits/chosen": -0.9400320053100586, "logits/rejected": -0.8928335905075073, "logps/chosen": -3.4663825035095215, "logps/rejected": -2.9255318641662598, "loss": 2.5899, "nll_loss": 2.462348699569702, "rewards/accuracies": 0.5, "rewards/chosen": -0.3466382622718811, "rewards/margins": -0.05408506095409393, "rewards/rejected": -0.292553186416626, "step": 123 }, { "epoch": 0.3394934976043806, "grad_norm": 3.3037710189819336, "learning_rate": 9.83013698630137e-07, "log_odds_chosen": -0.020853757858276367, "log_odds_ratio": -0.9548428058624268, "logits/chosen": -0.9478827714920044, "logits/rejected": -1.0183451175689697, "logps/chosen": -2.7154340744018555, "logps/rejected": -2.6419529914855957, "loss": 2.545, "nll_loss": 2.44950795173645, "rewards/accuracies": 0.75, "rewards/chosen": -0.27154338359832764, "rewards/margins": -0.007348105311393738, "rewards/rejected": -0.2641952931880951, "step": 124 }, { "epoch": 0.34223134839151265, "grad_norm": 4.907024383544922, "learning_rate": 9.828767123287671e-07, "log_odds_chosen": -2.044884443283081, "log_odds_ratio": -2.398036003112793, "logits/chosen": -1.0146313905715942, "logits/rejected": -0.9175862669944763, "logps/chosen": -5.2448649406433105, "logps/rejected": -3.247389316558838, "loss": 3.0023, "nll_loss": 2.762537956237793, "rewards/accuracies": 0.125, "rewards/chosen": -0.5244864821434021, "rewards/margins": -0.19974756240844727, "rewards/rejected": -0.32473891973495483, "step": 125 }, { "epoch": 0.34496919917864477, "grad_norm": 4.329273700714111, "learning_rate": 9.82739726027397e-07, "log_odds_chosen": -1.058132529258728, "log_odds_ratio": -1.6879684925079346, "logits/chosen": -0.9262245893478394, "logits/rejected": -0.9328924417495728, "logps/chosen": -4.540834903717041, "logps/rejected": -3.496159553527832, "loss": 2.7664, "nll_loss": 2.5975847244262695, "rewards/accuracies": 0.25, "rewards/chosen": -0.45408353209495544, "rewards/margins": -0.10446757078170776, "rewards/rejected": -0.3496159315109253, "step": 126 }, { "epoch": 0.34770704996577684, "grad_norm": 4.322659492492676, "learning_rate": 9.826027397260273e-07, "log_odds_chosen": -1.1281095743179321, "log_odds_ratio": -1.7006577253341675, "logits/chosen": -1.0466208457946777, "logits/rejected": -1.0094547271728516, "logps/chosen": -4.533938407897949, "logps/rejected": -3.402451276779175, "loss": 2.8169, "nll_loss": 2.646843433380127, "rewards/accuracies": 0.375, "rewards/chosen": -0.4533938765525818, "rewards/margins": -0.11314871907234192, "rewards/rejected": -0.3402451276779175, "step": 127 }, { "epoch": 0.35044490075290896, "grad_norm": 4.066173553466797, "learning_rate": 9.824657534246575e-07, "log_odds_chosen": -1.0748696327209473, "log_odds_ratio": -1.4345958232879639, "logits/chosen": -1.0842915773391724, "logits/rejected": -1.0172605514526367, "logps/chosen": -3.4919495582580566, "logps/rejected": -2.48044490814209, "loss": 2.702, "nll_loss": 2.558537006378174, "rewards/accuracies": 0.125, "rewards/chosen": -0.3491949439048767, "rewards/margins": -0.10115043818950653, "rewards/rejected": -0.24804452061653137, "step": 128 }, { "epoch": 0.3531827515400411, "grad_norm": 3.72782564163208, "learning_rate": 9.823287671232875e-07, "log_odds_chosen": -0.4605371952056885, "log_odds_ratio": -1.0050482749938965, "logits/chosen": -1.0677599906921387, "logits/rejected": -1.0298247337341309, "logps/chosen": -2.8983240127563477, "logps/rejected": -2.448608636856079, "loss": 2.5534, "nll_loss": 2.4528722763061523, "rewards/accuracies": 0.25, "rewards/chosen": -0.2898324131965637, "rewards/margins": -0.044971540570259094, "rewards/rejected": -0.24486085772514343, "step": 129 }, { "epoch": 0.35592060232717315, "grad_norm": 3.7137064933776855, "learning_rate": 9.821917808219177e-07, "log_odds_chosen": -0.12341967225074768, "log_odds_ratio": -1.0519436597824097, "logits/chosen": -1.062656283378601, "logits/rejected": -1.0923658609390259, "logps/chosen": -3.009211778640747, "logps/rejected": -2.8433616161346436, "loss": 2.5413, "nll_loss": 2.436110019683838, "rewards/accuracies": 0.5, "rewards/chosen": -0.30092117190361023, "rewards/margins": -0.016584988683462143, "rewards/rejected": -0.2843361794948578, "step": 130 }, { "epoch": 0.3586584531143053, "grad_norm": 3.9777534008026123, "learning_rate": 9.82054794520548e-07, "log_odds_chosen": -0.7902810573577881, "log_odds_ratio": -1.284623146057129, "logits/chosen": -1.0648274421691895, "logits/rejected": -1.0160682201385498, "logps/chosen": -3.2186708450317383, "logps/rejected": -2.4604923725128174, "loss": 2.7419, "nll_loss": 2.613464593887329, "rewards/accuracies": 0.375, "rewards/chosen": -0.32186707854270935, "rewards/margins": -0.07581785321235657, "rewards/rejected": -0.24604922533035278, "step": 131 }, { "epoch": 0.3613963039014374, "grad_norm": 2.994683027267456, "learning_rate": 9.819178082191781e-07, "log_odds_chosen": 0.9639139175415039, "log_odds_ratio": -0.7106072306632996, "logits/chosen": -0.7931936383247375, "logits/rejected": -0.8937540054321289, "logps/chosen": -2.276967763900757, "logps/rejected": -3.136530876159668, "loss": 2.324, "nll_loss": 2.2529633045196533, "rewards/accuracies": 0.75, "rewards/chosen": -0.22769679129123688, "rewards/margins": 0.08595628291368484, "rewards/rejected": -0.3136530816555023, "step": 132 }, { "epoch": 0.36413415468856947, "grad_norm": 3.7669692039489746, "learning_rate": 9.817808219178081e-07, "log_odds_chosen": 0.41143694519996643, "log_odds_ratio": -0.9011331796646118, "logits/chosen": -0.9454556703567505, "logits/rejected": -0.9958363771438599, "logps/chosen": -2.7831904888153076, "logps/rejected": -3.1857879161834717, "loss": 2.6159, "nll_loss": 2.5257976055145264, "rewards/accuracies": 0.375, "rewards/chosen": -0.2783190608024597, "rewards/margins": 0.040259722620248795, "rewards/rejected": -0.3185787796974182, "step": 133 }, { "epoch": 0.3668720054757016, "grad_norm": 4.698821067810059, "learning_rate": 9.816438356164383e-07, "log_odds_chosen": -1.3947482109069824, "log_odds_ratio": -1.9909785985946655, "logits/chosen": -0.9840841293334961, "logits/rejected": -0.9697123169898987, "logps/chosen": -4.600393772125244, "logps/rejected": -3.233961820602417, "loss": 2.9626, "nll_loss": 2.763533115386963, "rewards/accuracies": 0.25, "rewards/chosen": -0.4600393772125244, "rewards/margins": -0.1366431713104248, "rewards/rejected": -0.3233962059020996, "step": 134 }, { "epoch": 0.36960985626283366, "grad_norm": 3.3407888412475586, "learning_rate": 9.815068493150685e-07, "log_odds_chosen": 0.27846527099609375, "log_odds_ratio": -0.609581708908081, "logits/chosen": -0.9223493337631226, "logits/rejected": -1.0088773965835571, "logps/chosen": -2.2927587032318115, "logps/rejected": -2.5338311195373535, "loss": 2.4504, "nll_loss": 2.3894460201263428, "rewards/accuracies": 0.625, "rewards/chosen": -0.22927585244178772, "rewards/margins": 0.02410726062953472, "rewards/rejected": -0.2533831298351288, "step": 135 }, { "epoch": 0.3723477070499658, "grad_norm": 3.6436994075775146, "learning_rate": 9.813698630136985e-07, "log_odds_chosen": 0.5939947962760925, "log_odds_ratio": -0.5123732089996338, "logits/chosen": -0.9839766025543213, "logits/rejected": -0.9892544150352478, "logps/chosen": -2.834688901901245, "logps/rejected": -3.367307662963867, "loss": 2.426, "nll_loss": 2.3747878074645996, "rewards/accuracies": 0.75, "rewards/chosen": -0.28346890211105347, "rewards/margins": 0.05326184630393982, "rewards/rejected": -0.3367307484149933, "step": 136 }, { "epoch": 0.3750855578370979, "grad_norm": 3.748659372329712, "learning_rate": 9.812328767123287e-07, "log_odds_chosen": 0.0007442235946655273, "log_odds_ratio": -0.7988481521606445, "logits/chosen": -0.9709126353263855, "logits/rejected": -1.0249993801116943, "logps/chosen": -2.0990793704986572, "logps/rejected": -2.077833414077759, "loss": 2.51, "nll_loss": 2.4301185607910156, "rewards/accuracies": 0.625, "rewards/chosen": -0.20990794897079468, "rewards/margins": -0.0021245963871479034, "rewards/rejected": -0.20778334140777588, "step": 137 }, { "epoch": 0.37782340862423, "grad_norm": 3.9172651767730713, "learning_rate": 9.81095890410959e-07, "log_odds_chosen": 0.4092816710472107, "log_odds_ratio": -0.5941770076751709, "logits/chosen": -0.9979020357131958, "logits/rejected": -0.9535049200057983, "logps/chosen": -2.68363618850708, "logps/rejected": -3.044491767883301, "loss": 2.5816, "nll_loss": 2.5221424102783203, "rewards/accuracies": 0.625, "rewards/chosen": -0.2683636248111725, "rewards/margins": 0.03608554229140282, "rewards/rejected": -0.3044491708278656, "step": 138 }, { "epoch": 0.3805612594113621, "grad_norm": 2.9544191360473633, "learning_rate": 9.809589041095891e-07, "log_odds_chosen": 0.4867735803127289, "log_odds_ratio": -0.5887541770935059, "logits/chosen": -0.7585177421569824, "logits/rejected": -0.8397403955459595, "logps/chosen": -1.7697572708129883, "logps/rejected": -2.1410837173461914, "loss": 2.229, "nll_loss": 2.1701080799102783, "rewards/accuracies": 0.75, "rewards/chosen": -0.17697572708129883, "rewards/margins": 0.037132639437913895, "rewards/rejected": -0.21410837769508362, "step": 139 }, { "epoch": 0.38329911019849416, "grad_norm": 3.351578712463379, "learning_rate": 9.808219178082191e-07, "log_odds_chosen": -0.054792553186416626, "log_odds_ratio": -0.8900788426399231, "logits/chosen": -0.9306514263153076, "logits/rejected": -0.9582437872886658, "logps/chosen": -3.080561637878418, "logps/rejected": -2.959737539291382, "loss": 2.4205, "nll_loss": 2.3314907550811768, "rewards/accuracies": 0.625, "rewards/chosen": -0.30805617570877075, "rewards/margins": -0.012082386761903763, "rewards/rejected": -0.2959737777709961, "step": 140 }, { "epoch": 0.3860369609856263, "grad_norm": 3.721665382385254, "learning_rate": 9.806849315068493e-07, "log_odds_chosen": -0.09584300220012665, "log_odds_ratio": -0.8203674554824829, "logits/chosen": -0.9633460640907288, "logits/rejected": -0.9504889845848083, "logps/chosen": -2.5108776092529297, "logps/rejected": -2.4211573600769043, "loss": 2.5425, "nll_loss": 2.4604837894439697, "rewards/accuracies": 0.5, "rewards/chosen": -0.2510877549648285, "rewards/margins": -0.008972017094492912, "rewards/rejected": -0.24211573600769043, "step": 141 }, { "epoch": 0.3887748117727584, "grad_norm": 4.023640155792236, "learning_rate": 9.805479452054795e-07, "log_odds_chosen": -0.5730534195899963, "log_odds_ratio": -1.523244023323059, "logits/chosen": -0.9672691822052002, "logits/rejected": -1.0124071836471558, "logps/chosen": -4.38881254196167, "logps/rejected": -3.7752583026885986, "loss": 2.6183, "nll_loss": 2.4659698009490967, "rewards/accuracies": 0.375, "rewards/chosen": -0.4388812780380249, "rewards/margins": -0.061355434358119965, "rewards/rejected": -0.37752586603164673, "step": 142 }, { "epoch": 0.3915126625598905, "grad_norm": 3.373185157775879, "learning_rate": 9.804109589041095e-07, "log_odds_chosen": -0.1967618614435196, "log_odds_ratio": -0.8809226155281067, "logits/chosen": -1.0393353700637817, "logits/rejected": -1.0604230165481567, "logps/chosen": -2.894252061843872, "logps/rejected": -2.6767876148223877, "loss": 2.3986, "nll_loss": 2.3105268478393555, "rewards/accuracies": 0.375, "rewards/chosen": -0.28942519426345825, "rewards/margins": -0.021746419370174408, "rewards/rejected": -0.26767879724502563, "step": 143 }, { "epoch": 0.3942505133470226, "grad_norm": 3.899747371673584, "learning_rate": 9.802739726027397e-07, "log_odds_chosen": -0.10489356517791748, "log_odds_ratio": -0.9334206581115723, "logits/chosen": -1.032693862915039, "logits/rejected": -0.979771614074707, "logps/chosen": -2.635768175125122, "logps/rejected": -2.5128486156463623, "loss": 2.6022, "nll_loss": 2.5088553428649902, "rewards/accuracies": 0.25, "rewards/chosen": -0.26357683539390564, "rewards/margins": -0.012291960418224335, "rewards/rejected": -0.2512848675251007, "step": 144 }, { "epoch": 0.39698836413415467, "grad_norm": 3.443882465362549, "learning_rate": 9.8013698630137e-07, "log_odds_chosen": 1.6643991470336914, "log_odds_ratio": -0.3079368472099304, "logits/chosen": -0.8830698728561401, "logits/rejected": -1.007117509841919, "logps/chosen": -2.0038089752197266, "logps/rejected": -3.552250862121582, "loss": 2.3833, "nll_loss": 2.3525309562683105, "rewards/accuracies": 0.875, "rewards/chosen": -0.20038089156150818, "rewards/margins": 0.15484420955181122, "rewards/rejected": -0.3552250862121582, "step": 145 }, { "epoch": 0.3997262149212868, "grad_norm": 3.9361937046051025, "learning_rate": 9.8e-07, "log_odds_chosen": -0.7414035797119141, "log_odds_ratio": -1.2616384029388428, "logits/chosen": -1.1336967945098877, "logits/rejected": -1.0951836109161377, "logps/chosen": -3.366010904312134, "logps/rejected": -2.6171045303344727, "loss": 2.5974, "nll_loss": 2.471224069595337, "rewards/accuracies": 0.25, "rewards/chosen": -0.3366011381149292, "rewards/margins": -0.07489064335823059, "rewards/rejected": -0.2617104649543762, "step": 146 }, { "epoch": 0.4024640657084189, "grad_norm": 3.7853944301605225, "learning_rate": 9.798630136986301e-07, "log_odds_chosen": -0.14412802457809448, "log_odds_ratio": -1.0724812746047974, "logits/chosen": -0.9365975856781006, "logits/rejected": -0.9680098295211792, "logps/chosen": -3.58191180229187, "logps/rejected": -3.3961734771728516, "loss": 2.5309, "nll_loss": 2.4236531257629395, "rewards/accuracies": 0.5, "rewards/chosen": -0.35819119215011597, "rewards/margins": -0.01857387274503708, "rewards/rejected": -0.3396173417568207, "step": 147 }, { "epoch": 0.405201916495551, "grad_norm": 4.2575764656066895, "learning_rate": 9.797260273972601e-07, "log_odds_chosen": -0.26467692852020264, "log_odds_ratio": -1.1336698532104492, "logits/chosen": -0.8751269578933716, "logits/rejected": -0.8775174617767334, "logps/chosen": -3.5274946689605713, "logps/rejected": -3.2472119331359863, "loss": 2.7051, "nll_loss": 2.59177565574646, "rewards/accuracies": 0.5, "rewards/chosen": -0.35274946689605713, "rewards/margins": -0.02802826091647148, "rewards/rejected": -0.32472121715545654, "step": 148 }, { "epoch": 0.4079397672826831, "grad_norm": 3.588904857635498, "learning_rate": 9.795890410958903e-07, "log_odds_chosen": -0.16217470169067383, "log_odds_ratio": -1.0144822597503662, "logits/chosen": -0.9715244770050049, "logits/rejected": -1.0043455362319946, "logps/chosen": -3.1404638290405273, "logps/rejected": -2.878408670425415, "loss": 2.5141, "nll_loss": 2.4126715660095215, "rewards/accuracies": 0.625, "rewards/chosen": -0.3140464127063751, "rewards/margins": -0.026205524802207947, "rewards/rejected": -0.287840873003006, "step": 149 }, { "epoch": 0.4106776180698152, "grad_norm": 3.552358627319336, "learning_rate": 9.794520547945205e-07, "log_odds_chosen": 0.48731815814971924, "log_odds_ratio": -0.6171642541885376, "logits/chosen": -1.056907057762146, "logits/rejected": -1.0643813610076904, "logps/chosen": -2.4795916080474854, "logps/rejected": -2.9132704734802246, "loss": 2.372, "nll_loss": 2.310271978378296, "rewards/accuracies": 0.5, "rewards/chosen": -0.24795915186405182, "rewards/margins": 0.043367911130189896, "rewards/rejected": -0.2913270592689514, "step": 150 }, { "epoch": 0.4134154688569473, "grad_norm": 4.104135513305664, "learning_rate": 9.793150684931505e-07, "log_odds_chosen": 0.2952716052532196, "log_odds_ratio": -0.7600985169410706, "logits/chosen": -0.8732432723045349, "logits/rejected": -0.8834478259086609, "logps/chosen": -2.830569267272949, "logps/rejected": -3.0884807109832764, "loss": 2.5959, "nll_loss": 2.5199174880981445, "rewards/accuracies": 0.5, "rewards/chosen": -0.28305694460868835, "rewards/margins": 0.02579113468527794, "rewards/rejected": -0.3088480830192566, "step": 151 }, { "epoch": 0.4161533196440794, "grad_norm": 3.932196617126465, "learning_rate": 9.791780821917807e-07, "log_odds_chosen": 0.060509711503982544, "log_odds_ratio": -0.9248225688934326, "logits/chosen": -1.1164331436157227, "logits/rejected": -1.1438719034194946, "logps/chosen": -3.4772109985351562, "logps/rejected": -3.5051825046539307, "loss": 2.5662, "nll_loss": 2.473701238632202, "rewards/accuracies": 0.5, "rewards/chosen": -0.3477210998535156, "rewards/margins": 0.0027971435338258743, "rewards/rejected": -0.35051822662353516, "step": 152 }, { "epoch": 0.4188911704312115, "grad_norm": 3.973670244216919, "learning_rate": 9.79041095890411e-07, "log_odds_chosen": -1.207170009613037, "log_odds_ratio": -1.8546093702316284, "logits/chosen": -1.0570242404937744, "logits/rejected": -1.0306055545806885, "logps/chosen": -3.903413772583008, "logps/rejected": -2.6826670169830322, "loss": 2.6408, "nll_loss": 2.455339193344116, "rewards/accuracies": 0.5, "rewards/chosen": -0.3903414011001587, "rewards/margins": -0.1220746785402298, "rewards/rejected": -0.2682667076587677, "step": 153 }, { "epoch": 0.4216290212183436, "grad_norm": 3.8454196453094482, "learning_rate": 9.78904109589041e-07, "log_odds_chosen": -0.01215614378452301, "log_odds_ratio": -1.1104350090026855, "logits/chosen": -0.9850133061408997, "logits/rejected": -1.1470788717269897, "logps/chosen": -3.161301612854004, "logps/rejected": -3.0830278396606445, "loss": 2.4401, "nll_loss": 2.329082489013672, "rewards/accuracies": 0.5, "rewards/chosen": -0.3161301612854004, "rewards/margins": -0.00782736949622631, "rewards/rejected": -0.30830276012420654, "step": 154 }, { "epoch": 0.4243668720054757, "grad_norm": 3.7128891944885254, "learning_rate": 9.787671232876711e-07, "log_odds_chosen": -0.18961814045906067, "log_odds_ratio": -0.899223804473877, "logits/chosen": -0.9826735258102417, "logits/rejected": -0.9682378768920898, "logps/chosen": -2.5949060916900635, "logps/rejected": -2.374180555343628, "loss": 2.451, "nll_loss": 2.3611059188842773, "rewards/accuracies": 0.625, "rewards/chosen": -0.25949060916900635, "rewards/margins": -0.02207256853580475, "rewards/rejected": -0.2374180555343628, "step": 155 }, { "epoch": 0.4271047227926078, "grad_norm": 3.8693950176239014, "learning_rate": 9.786301369863013e-07, "log_odds_chosen": -0.19175992906093597, "log_odds_ratio": -0.903197705745697, "logits/chosen": -0.92995285987854, "logits/rejected": -0.9590866565704346, "logps/chosen": -3.012057065963745, "logps/rejected": -2.79270601272583, "loss": 2.4997, "nll_loss": 2.4093847274780273, "rewards/accuracies": 0.5, "rewards/chosen": -0.30120569467544556, "rewards/margins": -0.021935097873210907, "rewards/rejected": -0.27927061915397644, "step": 156 }, { "epoch": 0.42984257357973993, "grad_norm": 4.153520584106445, "learning_rate": 9.784931506849313e-07, "log_odds_chosen": -1.4972946643829346, "log_odds_ratio": -1.9802322387695312, "logits/chosen": -1.1835787296295166, "logits/rejected": -1.0933505296707153, "logps/chosen": -4.106613636016846, "logps/rejected": -2.670647621154785, "loss": 2.7802, "nll_loss": 2.582205295562744, "rewards/accuracies": 0.375, "rewards/chosen": -0.41066136956214905, "rewards/margins": -0.1435966044664383, "rewards/rejected": -0.26706475019454956, "step": 157 }, { "epoch": 0.432580424366872, "grad_norm": 3.4427318572998047, "learning_rate": 9.783561643835615e-07, "log_odds_chosen": 0.18895134329795837, "log_odds_ratio": -0.9054528474807739, "logits/chosen": -1.0275702476501465, "logits/rejected": -1.0797913074493408, "logps/chosen": -2.3128650188446045, "logps/rejected": -2.4522454738616943, "loss": 2.3856, "nll_loss": 2.2950377464294434, "rewards/accuracies": 0.5, "rewards/chosen": -0.23128649592399597, "rewards/margins": 0.013938050717115402, "rewards/rejected": -0.24522455036640167, "step": 158 }, { "epoch": 0.4353182751540041, "grad_norm": 3.6886043548583984, "learning_rate": 9.782191780821918e-07, "log_odds_chosen": -0.4290751814842224, "log_odds_ratio": -1.0035531520843506, "logits/chosen": -1.0916599035263062, "logits/rejected": -1.1187849044799805, "logps/chosen": -2.8594682216644287, "logps/rejected": -2.4214940071105957, "loss": 2.4873, "nll_loss": 2.3869664669036865, "rewards/accuracies": 0.375, "rewards/chosen": -0.2859468460083008, "rewards/margins": -0.04379744082689285, "rewards/rejected": -0.24214938282966614, "step": 159 }, { "epoch": 0.4380561259411362, "grad_norm": 3.989356756210327, "learning_rate": 9.78082191780822e-07, "log_odds_chosen": 0.19707316160202026, "log_odds_ratio": -0.9319517016410828, "logits/chosen": -1.03517484664917, "logits/rejected": -1.0394715070724487, "logps/chosen": -3.0506467819213867, "logps/rejected": -3.2326741218566895, "loss": 2.5937, "nll_loss": 2.5004563331604004, "rewards/accuracies": 0.5, "rewards/chosen": -0.30506467819213867, "rewards/margins": 0.018202736973762512, "rewards/rejected": -0.32326740026474, "step": 160 }, { "epoch": 0.4407939767282683, "grad_norm": 3.746607542037964, "learning_rate": 9.77945205479452e-07, "log_odds_chosen": -0.11115157604217529, "log_odds_ratio": -1.0117393732070923, "logits/chosen": -1.0951058864593506, "logits/rejected": -1.088064193725586, "logps/chosen": -2.9312286376953125, "logps/rejected": -2.8293678760528564, "loss": 2.5349, "nll_loss": 2.4337079524993896, "rewards/accuracies": 0.5, "rewards/chosen": -0.29312288761138916, "rewards/margins": -0.010186074301600456, "rewards/rejected": -0.28293678164482117, "step": 161 }, { "epoch": 0.44353182751540043, "grad_norm": 3.8147292137145996, "learning_rate": 9.778082191780822e-07, "log_odds_chosen": -0.7139273881912231, "log_odds_ratio": -1.3191455602645874, "logits/chosen": -0.9754422903060913, "logits/rejected": -0.9473571181297302, "logps/chosen": -3.1299166679382324, "logps/rejected": -2.413895606994629, "loss": 2.5242, "nll_loss": 2.3922739028930664, "rewards/accuracies": 0.25, "rewards/chosen": -0.3129916787147522, "rewards/margins": -0.07160209119319916, "rewards/rejected": -0.24138957262039185, "step": 162 }, { "epoch": 0.4462696783025325, "grad_norm": 4.054307460784912, "learning_rate": 9.776712328767124e-07, "log_odds_chosen": -0.9954604506492615, "log_odds_ratio": -1.4879951477050781, "logits/chosen": -1.0905406475067139, "logits/rejected": -1.0789070129394531, "logps/chosen": -3.359630584716797, "logps/rejected": -2.378443956375122, "loss": 2.6344, "nll_loss": 2.485579013824463, "rewards/accuracies": 0.375, "rewards/chosen": -0.33596307039260864, "rewards/margins": -0.09811867028474808, "rewards/rejected": -0.23784439265727997, "step": 163 }, { "epoch": 0.4490075290896646, "grad_norm": 3.9101269245147705, "learning_rate": 9.775342465753424e-07, "log_odds_chosen": -0.01678529381752014, "log_odds_ratio": -0.7648042440414429, "logits/chosen": -1.0278620719909668, "logits/rejected": -1.0179500579833984, "logps/chosen": -3.1583051681518555, "logps/rejected": -3.1306819915771484, "loss": 2.529, "nll_loss": 2.452509880065918, "rewards/accuracies": 0.625, "rewards/chosen": -0.3158304989337921, "rewards/margins": -0.00276227667927742, "rewards/rejected": -0.3130682110786438, "step": 164 }, { "epoch": 0.4517453798767967, "grad_norm": 4.3098225593566895, "learning_rate": 9.773972602739726e-07, "log_odds_chosen": -0.4334731698036194, "log_odds_ratio": -1.1495437622070312, "logits/chosen": -1.1162850856781006, "logits/rejected": -1.0896682739257812, "logps/chosen": -3.5925638675689697, "logps/rejected": -3.163508176803589, "loss": 2.6294, "nll_loss": 2.5144567489624023, "rewards/accuracies": 0.375, "rewards/chosen": -0.359256386756897, "rewards/margins": -0.04290558397769928, "rewards/rejected": -0.3163508176803589, "step": 165 }, { "epoch": 0.4544832306639288, "grad_norm": 3.5871243476867676, "learning_rate": 9.772602739726028e-07, "log_odds_chosen": -0.04422682523727417, "log_odds_ratio": -0.8654580116271973, "logits/chosen": -0.8659054040908813, "logits/rejected": -0.917928159236908, "logps/chosen": -2.9845969676971436, "logps/rejected": -2.8893027305603027, "loss": 2.4494, "nll_loss": 2.3628454208374023, "rewards/accuracies": 0.625, "rewards/chosen": -0.2984597086906433, "rewards/margins": -0.009529422968626022, "rewards/rejected": -0.2889302968978882, "step": 166 }, { "epoch": 0.45722108145106094, "grad_norm": 4.116689205169678, "learning_rate": 9.77123287671233e-07, "log_odds_chosen": -0.624809741973877, "log_odds_ratio": -1.2059484720230103, "logits/chosen": -0.9472008943557739, "logits/rejected": -0.9392125606536865, "logps/chosen": -3.5149874687194824, "logps/rejected": -2.880444049835205, "loss": 2.536, "nll_loss": 2.415393352508545, "rewards/accuracies": 0.375, "rewards/chosen": -0.35149872303009033, "rewards/margins": -0.06345435976982117, "rewards/rejected": -0.28804439306259155, "step": 167 }, { "epoch": 0.459958932238193, "grad_norm": 3.853459119796753, "learning_rate": 9.76986301369863e-07, "log_odds_chosen": 0.0987338125705719, "log_odds_ratio": -0.7133047580718994, "logits/chosen": -1.079719066619873, "logits/rejected": -1.0576691627502441, "logps/chosen": -2.7317147254943848, "logps/rejected": -2.8037819862365723, "loss": 2.4518, "nll_loss": 2.380483388900757, "rewards/accuracies": 0.625, "rewards/chosen": -0.27317148447036743, "rewards/margins": 0.007206736132502556, "rewards/rejected": -0.28037822246551514, "step": 168 }, { "epoch": 0.46269678302532513, "grad_norm": 3.7311320304870605, "learning_rate": 9.768493150684932e-07, "log_odds_chosen": 0.13160595297813416, "log_odds_ratio": -0.8184800744056702, "logits/chosen": -0.9557075500488281, "logits/rejected": -1.059307336807251, "logps/chosen": -2.5507335662841797, "logps/rejected": -2.6367833614349365, "loss": 2.4082, "nll_loss": 2.3263823986053467, "rewards/accuracies": 0.5, "rewards/chosen": -0.25507333874702454, "rewards/margins": 0.008604992181062698, "rewards/rejected": -0.26367834210395813, "step": 169 }, { "epoch": 0.4654346338124572, "grad_norm": 3.8723607063293457, "learning_rate": 9.767123287671234e-07, "log_odds_chosen": -0.06133022904396057, "log_odds_ratio": -0.8897221088409424, "logits/chosen": -0.8661093711853027, "logits/rejected": -0.83216792345047, "logps/chosen": -2.746224880218506, "logps/rejected": -2.6057214736938477, "loss": 2.5098, "nll_loss": 2.420807123184204, "rewards/accuracies": 0.625, "rewards/chosen": -0.27462247014045715, "rewards/margins": -0.014050314202904701, "rewards/rejected": -0.2605721354484558, "step": 170 }, { "epoch": 0.4681724845995893, "grad_norm": 3.9200310707092285, "learning_rate": 9.765753424657534e-07, "log_odds_chosen": -1.0021110773086548, "log_odds_ratio": -1.6459003686904907, "logits/chosen": -1.0536288022994995, "logits/rejected": -1.116455078125, "logps/chosen": -3.8352200984954834, "logps/rejected": -2.8282172679901123, "loss": 2.5436, "nll_loss": 2.3790249824523926, "rewards/accuracies": 0.5, "rewards/chosen": -0.38352200388908386, "rewards/margins": -0.10070029646158218, "rewards/rejected": -0.2828217148780823, "step": 171 }, { "epoch": 0.47091033538672145, "grad_norm": 3.6511623859405518, "learning_rate": 9.764383561643836e-07, "log_odds_chosen": -0.46101754903793335, "log_odds_ratio": -1.027446985244751, "logits/chosen": -1.002457618713379, "logits/rejected": -0.9703820943832397, "logps/chosen": -2.7622122764587402, "logps/rejected": -2.3400535583496094, "loss": 2.404, "nll_loss": 2.301280975341797, "rewards/accuracies": 0.375, "rewards/chosen": -0.27622121572494507, "rewards/margins": -0.042215876281261444, "rewards/rejected": -0.23400536179542542, "step": 172 }, { "epoch": 0.4736481861738535, "grad_norm": 4.119174480438232, "learning_rate": 9.763013698630138e-07, "log_odds_chosen": 0.4139353632926941, "log_odds_ratio": -0.7137945890426636, "logits/chosen": -0.9318885803222656, "logits/rejected": -0.9666367769241333, "logps/chosen": -3.107789993286133, "logps/rejected": -3.4920148849487305, "loss": 2.4862, "nll_loss": 2.4148082733154297, "rewards/accuracies": 0.625, "rewards/chosen": -0.31077897548675537, "rewards/margins": 0.038422517478466034, "rewards/rejected": -0.349201500415802, "step": 173 }, { "epoch": 0.47638603696098564, "grad_norm": 4.588752746582031, "learning_rate": 9.761643835616438e-07, "log_odds_chosen": -2.0065317153930664, "log_odds_ratio": -2.3870718479156494, "logits/chosen": -1.0731213092803955, "logits/rejected": -1.0032347440719604, "logps/chosen": -4.730210781097412, "logps/rejected": -2.750746488571167, "loss": 2.7348, "nll_loss": 2.496068000793457, "rewards/accuracies": 0.125, "rewards/chosen": -0.47302109003067017, "rewards/margins": -0.1979464441537857, "rewards/rejected": -0.27507466077804565, "step": 174 }, { "epoch": 0.4791238877481177, "grad_norm": 3.749183416366577, "learning_rate": 9.76027397260274e-07, "log_odds_chosen": -0.3116331398487091, "log_odds_ratio": -0.9560946822166443, "logits/chosen": -1.016358494758606, "logits/rejected": -1.0101301670074463, "logps/chosen": -2.704728603363037, "logps/rejected": -2.3783602714538574, "loss": 2.4473, "nll_loss": 2.3517279624938965, "rewards/accuracies": 0.375, "rewards/chosen": -0.27047285437583923, "rewards/margins": -0.032636843621730804, "rewards/rejected": -0.23783600330352783, "step": 175 }, { "epoch": 0.48186173853524983, "grad_norm": 3.896527051925659, "learning_rate": 9.758904109589042e-07, "log_odds_chosen": 0.3506035804748535, "log_odds_ratio": -0.6331712007522583, "logits/chosen": -1.1071351766586304, "logits/rejected": -1.082668662071228, "logps/chosen": -2.560579776763916, "logps/rejected": -2.8729796409606934, "loss": 2.4961, "nll_loss": 2.432802200317383, "rewards/accuracies": 0.625, "rewards/chosen": -0.2560579776763916, "rewards/margins": 0.03123999759554863, "rewards/rejected": -0.28729796409606934, "step": 176 }, { "epoch": 0.48459958932238195, "grad_norm": 4.503257751464844, "learning_rate": 9.757534246575342e-07, "log_odds_chosen": 0.29666417837142944, "log_odds_ratio": -0.7252674102783203, "logits/chosen": -1.0024237632751465, "logits/rejected": -0.9786036014556885, "logps/chosen": -3.239272117614746, "logps/rejected": -3.5151400566101074, "loss": 2.6541, "nll_loss": 2.5815634727478027, "rewards/accuracies": 0.375, "rewards/chosen": -0.32392722368240356, "rewards/margins": 0.02758682146668434, "rewards/rejected": -0.3515140414237976, "step": 177 }, { "epoch": 0.487337440109514, "grad_norm": 5.260904312133789, "learning_rate": 9.756164383561644e-07, "log_odds_chosen": -1.5022540092468262, "log_odds_ratio": -1.9621281623840332, "logits/chosen": -1.1083505153656006, "logits/rejected": -1.1022474765777588, "logps/chosen": -5.2968621253967285, "logps/rejected": -3.812129020690918, "loss": 2.7127, "nll_loss": 2.516439914703369, "rewards/accuracies": 0.125, "rewards/chosen": -0.5296862125396729, "rewards/margins": -0.14847332239151, "rewards/rejected": -0.38121289014816284, "step": 178 }, { "epoch": 0.49007529089664614, "grad_norm": 3.5473177433013916, "learning_rate": 9.754794520547944e-07, "log_odds_chosen": -0.1835687756538391, "log_odds_ratio": -0.9418627619743347, "logits/chosen": -0.9946032762527466, "logits/rejected": -1.0168371200561523, "logps/chosen": -3.184192657470703, "logps/rejected": -2.961320638656616, "loss": 2.3579, "nll_loss": 2.263699531555176, "rewards/accuracies": 0.625, "rewards/chosen": -0.31841927766799927, "rewards/margins": -0.022287216037511826, "rewards/rejected": -0.29613205790519714, "step": 179 }, { "epoch": 0.4928131416837782, "grad_norm": 4.935855865478516, "learning_rate": 9.753424657534246e-07, "log_odds_chosen": -1.4426921606063843, "log_odds_ratio": -1.733705997467041, "logits/chosen": -0.9728791117668152, "logits/rejected": -0.8734503984451294, "logps/chosen": -4.358599662780762, "logps/rejected": -2.946125030517578, "loss": 2.8312, "nll_loss": 2.6578726768493652, "rewards/accuracies": 0.125, "rewards/chosen": -0.4358599781990051, "rewards/margins": -0.1412474364042282, "rewards/rejected": -0.29461249709129333, "step": 180 }, { "epoch": 0.49555099247091033, "grad_norm": 4.314568042755127, "learning_rate": 9.752054794520548e-07, "log_odds_chosen": -0.7953548431396484, "log_odds_ratio": -1.2990566492080688, "logits/chosen": -1.0923413038253784, "logits/rejected": -1.0223209857940674, "logps/chosen": -3.531126022338867, "logps/rejected": -2.739264965057373, "loss": 2.5936, "nll_loss": 2.4637038707733154, "rewards/accuracies": 0.375, "rewards/chosen": -0.3531126379966736, "rewards/margins": -0.07918613404035568, "rewards/rejected": -0.2739264965057373, "step": 181 }, { "epoch": 0.49828884325804246, "grad_norm": 3.588151216506958, "learning_rate": 9.750684931506848e-07, "log_odds_chosen": 1.1463922262191772, "log_odds_ratio": -0.5873667001724243, "logits/chosen": -1.0646668672561646, "logits/rejected": -1.1439580917358398, "logps/chosen": -2.6616897583007812, "logps/rejected": -3.753539562225342, "loss": 2.3891, "nll_loss": 2.3303709030151367, "rewards/accuracies": 0.75, "rewards/chosen": -0.2661689519882202, "rewards/margins": 0.10918502509593964, "rewards/rejected": -0.37535399198532104, "step": 182 }, { "epoch": 0.5010266940451745, "grad_norm": 4.091968059539795, "learning_rate": 9.74931506849315e-07, "log_odds_chosen": -0.2473304271697998, "log_odds_ratio": -0.8946778178215027, "logits/chosen": -1.01826810836792, "logits/rejected": -1.0193830728530884, "logps/chosen": -3.1832003593444824, "logps/rejected": -2.930553436279297, "loss": 2.5066, "nll_loss": 2.4171767234802246, "rewards/accuracies": 0.25, "rewards/chosen": -0.31832003593444824, "rewards/margins": -0.02526470459997654, "rewards/rejected": -0.29305535554885864, "step": 183 }, { "epoch": 0.5037645448323066, "grad_norm": 3.798990249633789, "learning_rate": 9.747945205479452e-07, "log_odds_chosen": -0.05884552001953125, "log_odds_ratio": -0.9496780037879944, "logits/chosen": -0.8530597686767578, "logits/rejected": -0.8795701265335083, "logps/chosen": -2.730297803878784, "logps/rejected": -2.6288323402404785, "loss": 2.4456, "nll_loss": 2.350646495819092, "rewards/accuracies": 0.5, "rewards/chosen": -0.27302980422973633, "rewards/margins": -0.01014653593301773, "rewards/rejected": -0.2628832459449768, "step": 184 }, { "epoch": 0.5065023956194388, "grad_norm": 3.6947782039642334, "learning_rate": 9.746575342465752e-07, "log_odds_chosen": 0.28221699595451355, "log_odds_ratio": -0.6905962228775024, "logits/chosen": -1.0284698009490967, "logits/rejected": -1.0868680477142334, "logps/chosen": -2.123769521713257, "logps/rejected": -2.333113670349121, "loss": 2.3473, "nll_loss": 2.278268337249756, "rewards/accuracies": 0.75, "rewards/chosen": -0.21237695217132568, "rewards/margins": 0.02093442901968956, "rewards/rejected": -0.23331138491630554, "step": 185 }, { "epoch": 0.5092402464065708, "grad_norm": 3.8833167552948, "learning_rate": 9.745205479452054e-07, "log_odds_chosen": -0.664156973361969, "log_odds_ratio": -1.1518914699554443, "logits/chosen": -0.9238696098327637, "logits/rejected": -0.9096887707710266, "logps/chosen": -3.1764814853668213, "logps/rejected": -2.5295748710632324, "loss": 2.4773, "nll_loss": 2.3621037006378174, "rewards/accuracies": 0.25, "rewards/chosen": -0.31764814257621765, "rewards/margins": -0.06469063460826874, "rewards/rejected": -0.2529575228691101, "step": 186 }, { "epoch": 0.5119780971937029, "grad_norm": 3.9611003398895264, "learning_rate": 9.743835616438356e-07, "log_odds_chosen": -0.09914115071296692, "log_odds_ratio": -0.87237149477005, "logits/chosen": -1.0958055257797241, "logits/rejected": -1.0860658884048462, "logps/chosen": -3.019540786743164, "logps/rejected": -2.9200596809387207, "loss": 2.4571, "nll_loss": 2.3698372840881348, "rewards/accuracies": 0.375, "rewards/chosen": -0.301954060792923, "rewards/margins": -0.009948091581463814, "rewards/rejected": -0.2920059561729431, "step": 187 }, { "epoch": 0.5147159479808351, "grad_norm": 4.01923131942749, "learning_rate": 9.742465753424656e-07, "log_odds_chosen": -0.39520883560180664, "log_odds_ratio": -1.2605853080749512, "logits/chosen": -0.9418148398399353, "logits/rejected": -1.0439902544021606, "logps/chosen": -3.3889293670654297, "logps/rejected": -2.9382095336914062, "loss": 2.4784, "nll_loss": 2.352308750152588, "rewards/accuracies": 0.5, "rewards/chosen": -0.33889293670654297, "rewards/margins": -0.045071978121995926, "rewards/rejected": -0.29382097721099854, "step": 188 }, { "epoch": 0.5174537987679672, "grad_norm": 3.5351648330688477, "learning_rate": 9.741095890410958e-07, "log_odds_chosen": 0.42051565647125244, "log_odds_ratio": -0.5564237236976624, "logits/chosen": -0.8665564060211182, "logits/rejected": -0.9201534986495972, "logps/chosen": -2.244086265563965, "logps/rejected": -2.561650276184082, "loss": 2.268, "nll_loss": 2.2123494148254395, "rewards/accuracies": 0.875, "rewards/chosen": -0.22440862655639648, "rewards/margins": 0.03175639361143112, "rewards/rejected": -0.2561650276184082, "step": 189 }, { "epoch": 0.5201916495550992, "grad_norm": 4.293445587158203, "learning_rate": 9.73972602739726e-07, "log_odds_chosen": 0.02253510057926178, "log_odds_ratio": -1.341723918914795, "logits/chosen": -1.034377932548523, "logits/rejected": -1.047147274017334, "logps/chosen": -3.3224520683288574, "logps/rejected": -3.342078447341919, "loss": 2.5408, "nll_loss": 2.406654119491577, "rewards/accuracies": 0.5, "rewards/chosen": -0.33224523067474365, "rewards/margins": 0.0019626114517450333, "rewards/rejected": -0.33420783281326294, "step": 190 }, { "epoch": 0.5229295003422314, "grad_norm": 4.2223381996154785, "learning_rate": 9.738356164383562e-07, "log_odds_chosen": -0.6821969151496887, "log_odds_ratio": -1.2772631645202637, "logits/chosen": -1.077286720275879, "logits/rejected": -1.0273078680038452, "logps/chosen": -3.2895164489746094, "logps/rejected": -2.6008479595184326, "loss": 2.6706, "nll_loss": 2.5429129600524902, "rewards/accuracies": 0.25, "rewards/chosen": -0.3289516568183899, "rewards/margins": -0.06886687874794006, "rewards/rejected": -0.26008477807044983, "step": 191 }, { "epoch": 0.5256673511293635, "grad_norm": 4.532113075256348, "learning_rate": 9.736986301369862e-07, "log_odds_chosen": -0.5565770268440247, "log_odds_ratio": -1.0577927827835083, "logits/chosen": -1.0261878967285156, "logits/rejected": -0.9809314012527466, "logps/chosen": -3.3714687824249268, "logps/rejected": -2.832068681716919, "loss": 2.6119, "nll_loss": 2.506166458129883, "rewards/accuracies": 0.25, "rewards/chosen": -0.3371468782424927, "rewards/margins": -0.05394001305103302, "rewards/rejected": -0.28320688009262085, "step": 192 }, { "epoch": 0.5284052019164955, "grad_norm": 4.559903144836426, "learning_rate": 9.735616438356164e-07, "log_odds_chosen": -1.150149941444397, "log_odds_ratio": -1.7341642379760742, "logits/chosen": -1.054524540901184, "logits/rejected": -1.008471965789795, "logps/chosen": -4.019407272338867, "logps/rejected": -2.8763585090637207, "loss": 2.6447, "nll_loss": 2.471290111541748, "rewards/accuracies": 0.375, "rewards/chosen": -0.4019407331943512, "rewards/margins": -0.11430487036705017, "rewards/rejected": -0.287635862827301, "step": 193 }, { "epoch": 0.5311430527036276, "grad_norm": 4.72752046585083, "learning_rate": 9.734246575342466e-07, "log_odds_chosen": -0.21686644852161407, "log_odds_ratio": -1.663879156112671, "logits/chosen": -1.0025882720947266, "logits/rejected": -1.0349336862564087, "logps/chosen": -4.7115864753723145, "logps/rejected": -4.442992210388184, "loss": 2.5969, "nll_loss": 2.4305238723754883, "rewards/accuracies": 0.25, "rewards/chosen": -0.47115862369537354, "rewards/margins": -0.02685944363474846, "rewards/rejected": -0.44429922103881836, "step": 194 }, { "epoch": 0.5338809034907598, "grad_norm": 3.891648292541504, "learning_rate": 9.732876712328766e-07, "log_odds_chosen": -0.11678029596805573, "log_odds_ratio": -1.0703227519989014, "logits/chosen": -0.9826977849006653, "logits/rejected": -1.0092852115631104, "logps/chosen": -3.2661046981811523, "logps/rejected": -3.12026309967041, "loss": 2.4222, "nll_loss": 2.3151497840881348, "rewards/accuracies": 0.625, "rewards/chosen": -0.3266104459762573, "rewards/margins": -0.014584150165319443, "rewards/rejected": -0.31202632188796997, "step": 195 }, { "epoch": 0.5366187542778919, "grad_norm": 4.304116725921631, "learning_rate": 9.731506849315068e-07, "log_odds_chosen": -0.3066619038581848, "log_odds_ratio": -1.5483427047729492, "logits/chosen": -1.0088958740234375, "logits/rejected": -0.9693753123283386, "logps/chosen": -3.4133143424987793, "logps/rejected": -3.1224846839904785, "loss": 2.5446, "nll_loss": 2.389749050140381, "rewards/accuracies": 0.375, "rewards/chosen": -0.341331422328949, "rewards/margins": -0.029082950204610825, "rewards/rejected": -0.31224846839904785, "step": 196 }, { "epoch": 0.5393566050650239, "grad_norm": 3.8327560424804688, "learning_rate": 9.73013698630137e-07, "log_odds_chosen": -0.29820477962493896, "log_odds_ratio": -0.9775595664978027, "logits/chosen": -1.0613758563995361, "logits/rejected": -1.1337169408798218, "logps/chosen": -3.2645277976989746, "logps/rejected": -2.9103028774261475, "loss": 2.4209, "nll_loss": 2.3231630325317383, "rewards/accuracies": 0.375, "rewards/chosen": -0.3264527916908264, "rewards/margins": -0.03542247414588928, "rewards/rejected": -0.29103028774261475, "step": 197 }, { "epoch": 0.5420944558521561, "grad_norm": 4.229416370391846, "learning_rate": 9.728767123287672e-07, "log_odds_chosen": 0.728168249130249, "log_odds_ratio": -0.5645437240600586, "logits/chosen": -1.0156749486923218, "logits/rejected": -1.0262094736099243, "logps/chosen": -2.845877170562744, "logps/rejected": -3.5301570892333984, "loss": 2.4844, "nll_loss": 2.427907943725586, "rewards/accuracies": 0.75, "rewards/chosen": -0.28458771109580994, "rewards/margins": 0.06842799484729767, "rewards/rejected": -0.3530157208442688, "step": 198 }, { "epoch": 0.5448323066392882, "grad_norm": 3.7319374084472656, "learning_rate": 9.727397260273972e-07, "log_odds_chosen": 0.5648605823516846, "log_odds_ratio": -0.6715899705886841, "logits/chosen": -1.004001498222351, "logits/rejected": -1.0147771835327148, "logps/chosen": -2.2974343299865723, "logps/rejected": -2.821608543395996, "loss": 2.3481, "nll_loss": 2.280902862548828, "rewards/accuracies": 0.5, "rewards/chosen": -0.22974345088005066, "rewards/margins": 0.05241738259792328, "rewards/rejected": -0.28216084837913513, "step": 199 }, { "epoch": 0.5475701574264202, "grad_norm": 3.9112017154693604, "learning_rate": 9.726027397260274e-07, "log_odds_chosen": 0.11911344528198242, "log_odds_ratio": -0.7076783180236816, "logits/chosen": -0.8821336627006531, "logits/rejected": -0.8992592692375183, "logps/chosen": -2.595640182495117, "logps/rejected": -2.6590209007263184, "loss": 2.3684, "nll_loss": 2.2976109981536865, "rewards/accuracies": 0.5, "rewards/chosen": -0.25956401228904724, "rewards/margins": 0.006338087841868401, "rewards/rejected": -0.2659021019935608, "step": 200 }, { "epoch": 0.5503080082135524, "grad_norm": 4.597007751464844, "learning_rate": 9.724657534246576e-07, "log_odds_chosen": -0.8817487955093384, "log_odds_ratio": -1.4242870807647705, "logits/chosen": -1.0671921968460083, "logits/rejected": -1.0047683715820312, "logps/chosen": -3.655637264251709, "logps/rejected": -2.798684597015381, "loss": 2.6155, "nll_loss": 2.473120927810669, "rewards/accuracies": 0.25, "rewards/chosen": -0.3655637502670288, "rewards/margins": -0.0856953039765358, "rewards/rejected": -0.2798684537410736, "step": 201 }, { "epoch": 0.5530458590006845, "grad_norm": 3.9316892623901367, "learning_rate": 9.723287671232876e-07, "log_odds_chosen": -0.42454320192337036, "log_odds_ratio": -1.256566047668457, "logits/chosen": -0.9922165870666504, "logits/rejected": -1.0375428199768066, "logps/chosen": -3.1410841941833496, "logps/rejected": -2.6458053588867188, "loss": 2.4215, "nll_loss": 2.29581618309021, "rewards/accuracies": 0.625, "rewards/chosen": -0.3141084313392639, "rewards/margins": -0.049527909606695175, "rewards/rejected": -0.26458051800727844, "step": 202 }, { "epoch": 0.5557837097878165, "grad_norm": 4.931307792663574, "learning_rate": 9.721917808219178e-07, "log_odds_chosen": -1.0663633346557617, "log_odds_ratio": -1.4591495990753174, "logits/chosen": -1.1272422075271606, "logits/rejected": -1.0548069477081299, "logps/chosen": -4.3278069496154785, "logps/rejected": -3.2903337478637695, "loss": 2.742, "nll_loss": 2.5960917472839355, "rewards/accuracies": 0.125, "rewards/chosen": -0.4327806830406189, "rewards/margins": -0.10374732315540314, "rewards/rejected": -0.32903337478637695, "step": 203 }, { "epoch": 0.5585215605749486, "grad_norm": 5.051851749420166, "learning_rate": 9.72054794520548e-07, "log_odds_chosen": -1.681997299194336, "log_odds_ratio": -1.9863660335540771, "logits/chosen": -1.0043067932128906, "logits/rejected": -0.9856469035148621, "logps/chosen": -4.7637434005737305, "logps/rejected": -3.10724139213562, "loss": 2.6995, "nll_loss": 2.5008749961853027, "rewards/accuracies": 0.125, "rewards/chosen": -0.47637438774108887, "rewards/margins": -0.16565020382404327, "rewards/rejected": -0.310724139213562, "step": 204 }, { "epoch": 0.5612594113620808, "grad_norm": 3.5703306198120117, "learning_rate": 9.71917808219178e-07, "log_odds_chosen": -0.14556244015693665, "log_odds_ratio": -0.9427212476730347, "logits/chosen": -1.0103267431259155, "logits/rejected": -1.079888939857483, "logps/chosen": -2.608201742172241, "logps/rejected": -2.4091453552246094, "loss": 2.3078, "nll_loss": 2.2135589122772217, "rewards/accuracies": 0.75, "rewards/chosen": -0.2608201503753662, "rewards/margins": -0.019905617460608482, "rewards/rejected": -0.24091455340385437, "step": 205 }, { "epoch": 0.5639972621492129, "grad_norm": 4.50073766708374, "learning_rate": 9.717808219178082e-07, "log_odds_chosen": -1.7220306396484375, "log_odds_ratio": -2.121640205383301, "logits/chosen": -1.0898494720458984, "logits/rejected": -1.0934927463531494, "logps/chosen": -4.2650251388549805, "logps/rejected": -2.61354923248291, "loss": 2.7185, "nll_loss": 2.5063090324401855, "rewards/accuracies": 0.375, "rewards/chosen": -0.426502525806427, "rewards/margins": -0.165147602558136, "rewards/rejected": -0.261354923248291, "step": 206 }, { "epoch": 0.5667351129363449, "grad_norm": 4.094022750854492, "learning_rate": 9.716438356164384e-07, "log_odds_chosen": 0.22063961625099182, "log_odds_ratio": -0.8533145785331726, "logits/chosen": -0.8655380606651306, "logits/rejected": -0.9460633397102356, "logps/chosen": -2.753047466278076, "logps/rejected": -2.9618000984191895, "loss": 2.4031, "nll_loss": 2.3178136348724365, "rewards/accuracies": 0.375, "rewards/chosen": -0.27530473470687866, "rewards/margins": 0.020875271409749985, "rewards/rejected": -0.29618000984191895, "step": 207 }, { "epoch": 0.5694729637234771, "grad_norm": 4.7556891441345215, "learning_rate": 9.715068493150684e-07, "log_odds_chosen": -0.31784993410110474, "log_odds_ratio": -0.9760192036628723, "logits/chosen": -1.0222461223602295, "logits/rejected": -0.9413294196128845, "logps/chosen": -3.2423136234283447, "logps/rejected": -2.8952794075012207, "loss": 2.5973, "nll_loss": 2.4997360706329346, "rewards/accuracies": 0.375, "rewards/chosen": -0.3242313861846924, "rewards/margins": -0.03470342606306076, "rewards/rejected": -0.289527952671051, "step": 208 }, { "epoch": 0.5722108145106092, "grad_norm": 3.745621919631958, "learning_rate": 9.713698630136986e-07, "log_odds_chosen": 0.20146936178207397, "log_odds_ratio": -0.7544434666633606, "logits/chosen": -0.9262546300888062, "logits/rejected": -1.0295175313949585, "logps/chosen": -2.255763053894043, "logps/rejected": -2.3510169982910156, "loss": 2.2552, "nll_loss": 2.1797523498535156, "rewards/accuracies": 0.625, "rewards/chosen": -0.22557632625102997, "rewards/margins": 0.009525380097329617, "rewards/rejected": -0.23510169982910156, "step": 209 }, { "epoch": 0.5749486652977412, "grad_norm": 4.343352317810059, "learning_rate": 9.712328767123286e-07, "log_odds_chosen": -1.0265356302261353, "log_odds_ratio": -1.576116681098938, "logits/chosen": -1.0518614053726196, "logits/rejected": -1.0573688745498657, "logps/chosen": -4.322563648223877, "logps/rejected": -3.2728519439697266, "loss": 2.5519, "nll_loss": 2.394321918487549, "rewards/accuracies": 0.25, "rewards/chosen": -0.43225640058517456, "rewards/margins": -0.10497116297483444, "rewards/rejected": -0.3272852301597595, "step": 210 }, { "epoch": 0.5776865160848734, "grad_norm": 4.2317609786987305, "learning_rate": 9.710958904109588e-07, "log_odds_chosen": -1.2606455087661743, "log_odds_ratio": -1.7562036514282227, "logits/chosen": -1.0666738748550415, "logits/rejected": -1.0389705896377563, "logps/chosen": -4.378828048706055, "logps/rejected": -3.1213390827178955, "loss": 2.6138, "nll_loss": 2.4381511211395264, "rewards/accuracies": 0.375, "rewards/chosen": -0.43788281083106995, "rewards/margins": -0.1257488876581192, "rewards/rejected": -0.31213390827178955, "step": 211 }, { "epoch": 0.5804243668720055, "grad_norm": 4.4866485595703125, "learning_rate": 9.70958904109589e-07, "log_odds_chosen": -1.270268201828003, "log_odds_ratio": -1.6423122882843018, "logits/chosen": -1.0133373737335205, "logits/rejected": -0.9856590032577515, "logps/chosen": -4.568937301635742, "logps/rejected": -3.3253045082092285, "loss": 2.671, "nll_loss": 2.506727695465088, "rewards/accuracies": 0.25, "rewards/chosen": -0.45689377188682556, "rewards/margins": -0.12436331808567047, "rewards/rejected": -0.3325304687023163, "step": 212 }, { "epoch": 0.5831622176591376, "grad_norm": 3.9728498458862305, "learning_rate": 9.70821917808219e-07, "log_odds_chosen": -0.16017401218414307, "log_odds_ratio": -1.046644687652588, "logits/chosen": -0.9583849906921387, "logits/rejected": -0.9524524211883545, "logps/chosen": -3.5093069076538086, "logps/rejected": -3.244563102722168, "loss": 2.3919, "nll_loss": 2.2872540950775146, "rewards/accuracies": 0.5, "rewards/chosen": -0.35093069076538086, "rewards/margins": -0.026474405080080032, "rewards/rejected": -0.3244563043117523, "step": 213 }, { "epoch": 0.5859000684462696, "grad_norm": 4.285681247711182, "learning_rate": 9.706849315068492e-07, "log_odds_chosen": -0.5343132615089417, "log_odds_ratio": -1.1885323524475098, "logits/chosen": -1.0192713737487793, "logits/rejected": -1.0104490518569946, "logps/chosen": -3.1196327209472656, "logps/rejected": -2.616828203201294, "loss": 2.5065, "nll_loss": 2.3876185417175293, "rewards/accuracies": 0.25, "rewards/chosen": -0.3119632601737976, "rewards/margins": -0.050280436873435974, "rewards/rejected": -0.26168280839920044, "step": 214 }, { "epoch": 0.5886379192334018, "grad_norm": 4.1310715675354, "learning_rate": 9.705479452054794e-07, "log_odds_chosen": 0.3819190561771393, "log_odds_ratio": -0.7882270812988281, "logits/chosen": -0.8445309400558472, "logits/rejected": -0.9002002477645874, "logps/chosen": -3.1020383834838867, "logps/rejected": -3.45573353767395, "loss": 2.4111, "nll_loss": 2.3322689533233643, "rewards/accuracies": 0.5, "rewards/chosen": -0.3102038502693176, "rewards/margins": 0.03536953032016754, "rewards/rejected": -0.345573365688324, "step": 215 }, { "epoch": 0.5913757700205339, "grad_norm": 3.848668098449707, "learning_rate": 9.704109589041094e-07, "log_odds_chosen": -1.1204711198806763, "log_odds_ratio": -1.6549917459487915, "logits/chosen": -1.0062270164489746, "logits/rejected": -1.0494340658187866, "logps/chosen": -3.560786247253418, "logps/rejected": -2.471292495727539, "loss": 2.4893, "nll_loss": 2.3238325119018555, "rewards/accuracies": 0.375, "rewards/chosen": -0.3560786843299866, "rewards/margins": -0.1089494377374649, "rewards/rejected": -0.24712924659252167, "step": 216 }, { "epoch": 0.5941136208076659, "grad_norm": 4.000442981719971, "learning_rate": 9.702739726027396e-07, "log_odds_chosen": 0.036565423011779785, "log_odds_ratio": -0.9924651980400085, "logits/chosen": -1.0387009382247925, "logits/rejected": -1.0612742900848389, "logps/chosen": -2.9406542778015137, "logps/rejected": -2.963644504547119, "loss": 2.4278, "nll_loss": 2.3285226821899414, "rewards/accuracies": 0.375, "rewards/chosen": -0.2940654456615448, "rewards/margins": 0.002299010753631592, "rewards/rejected": -0.2963644564151764, "step": 217 }, { "epoch": 0.5968514715947981, "grad_norm": 3.6667912006378174, "learning_rate": 9.701369863013698e-07, "log_odds_chosen": -0.2950342297554016, "log_odds_ratio": -1.0761315822601318, "logits/chosen": -0.8151571154594421, "logits/rejected": -0.8572335243225098, "logps/chosen": -2.768195152282715, "logps/rejected": -2.418398380279541, "loss": 2.3047, "nll_loss": 2.1971182823181152, "rewards/accuracies": 0.375, "rewards/chosen": -0.27681946754455566, "rewards/margins": -0.034979648888111115, "rewards/rejected": -0.24183985590934753, "step": 218 }, { "epoch": 0.5995893223819302, "grad_norm": 3.9219696521759033, "learning_rate": 9.7e-07, "log_odds_chosen": -0.7633076906204224, "log_odds_ratio": -1.2408452033996582, "logits/chosen": -0.9627888202667236, "logits/rejected": -0.9366324543952942, "logps/chosen": -3.2229080200195312, "logps/rejected": -2.4742507934570312, "loss": 2.4899, "nll_loss": 2.365856647491455, "rewards/accuracies": 0.25, "rewards/chosen": -0.3222908079624176, "rewards/margins": -0.07486572861671448, "rewards/rejected": -0.24742507934570312, "step": 219 }, { "epoch": 0.6023271731690623, "grad_norm": 3.5060009956359863, "learning_rate": 9.6986301369863e-07, "log_odds_chosen": 0.2942540645599365, "log_odds_ratio": -0.7475119829177856, "logits/chosen": -0.9150263071060181, "logits/rejected": -0.9921360015869141, "logps/chosen": -2.3902218341827393, "logps/rejected": -2.631092071533203, "loss": 2.4113, "nll_loss": 2.336535692214966, "rewards/accuracies": 0.5, "rewards/chosen": -0.23902219533920288, "rewards/margins": 0.024087032303214073, "rewards/rejected": -0.2631092369556427, "step": 220 }, { "epoch": 0.6050650239561944, "grad_norm": 3.9749324321746826, "learning_rate": 9.697260273972602e-07, "log_odds_chosen": -1.1054129600524902, "log_odds_ratio": -1.554943561553955, "logits/chosen": -1.0961570739746094, "logits/rejected": -1.0910487174987793, "logps/chosen": -3.156829833984375, "logps/rejected": -2.1156764030456543, "loss": 2.4243, "nll_loss": 2.2688419818878174, "rewards/accuracies": 0.25, "rewards/chosen": -0.3156830072402954, "rewards/margins": -0.10411536693572998, "rewards/rejected": -0.21156764030456543, "step": 221 }, { "epoch": 0.6078028747433265, "grad_norm": 4.013543605804443, "learning_rate": 9.695890410958904e-07, "log_odds_chosen": -0.43920978903770447, "log_odds_ratio": -1.4520313739776611, "logits/chosen": -1.0022125244140625, "logits/rejected": -1.130903959274292, "logps/chosen": -3.3749136924743652, "logps/rejected": -2.9058046340942383, "loss": 2.4644, "nll_loss": 2.319221258163452, "rewards/accuracies": 0.5, "rewards/chosen": -0.33749139308929443, "rewards/margins": -0.04691090062260628, "rewards/rejected": -0.2905804514884949, "step": 222 }, { "epoch": 0.6105407255304586, "grad_norm": 4.676258087158203, "learning_rate": 9.694520547945204e-07, "log_odds_chosen": -1.363329529762268, "log_odds_ratio": -1.8298991918563843, "logits/chosen": -1.0312271118164062, "logits/rejected": -1.014345645904541, "logps/chosen": -4.076310157775879, "logps/rejected": -2.730635404586792, "loss": 2.6399, "nll_loss": 2.456949234008789, "rewards/accuracies": 0.25, "rewards/chosen": -0.4076310396194458, "rewards/margins": -0.1345674693584442, "rewards/rejected": -0.2730635404586792, "step": 223 }, { "epoch": 0.6132785763175906, "grad_norm": 4.233703136444092, "learning_rate": 9.693150684931506e-07, "log_odds_chosen": -0.061972618103027344, "log_odds_ratio": -0.9869142770767212, "logits/chosen": -0.908562183380127, "logits/rejected": -0.962317168712616, "logps/chosen": -3.3300440311431885, "logps/rejected": -3.2028989791870117, "loss": 2.476, "nll_loss": 2.377354621887207, "rewards/accuracies": 0.625, "rewards/chosen": -0.3330044150352478, "rewards/margins": -0.012714514508843422, "rewards/rejected": -0.3202899098396301, "step": 224 }, { "epoch": 0.6160164271047228, "grad_norm": 3.374083995819092, "learning_rate": 9.691780821917808e-07, "log_odds_chosen": 0.4563578963279724, "log_odds_ratio": -0.5695846676826477, "logits/chosen": -0.9383721351623535, "logits/rejected": -1.0310462713241577, "logps/chosen": -2.1637766361236572, "logps/rejected": -2.556354284286499, "loss": 2.1965, "nll_loss": 2.1395816802978516, "rewards/accuracies": 0.875, "rewards/chosen": -0.21637767553329468, "rewards/margins": 0.039257775992155075, "rewards/rejected": -0.25563544034957886, "step": 225 }, { "epoch": 0.6187542778918549, "grad_norm": 3.6778807640075684, "learning_rate": 9.69041095890411e-07, "log_odds_chosen": -0.6323601007461548, "log_odds_ratio": -1.221813678741455, "logits/chosen": -1.1148056983947754, "logits/rejected": -1.1202964782714844, "logps/chosen": -3.121493101119995, "logps/rejected": -2.497840166091919, "loss": 2.3488, "nll_loss": 2.2266294956207275, "rewards/accuracies": 0.5, "rewards/chosen": -0.312149316072464, "rewards/margins": -0.06236531585454941, "rewards/rejected": -0.24978402256965637, "step": 226 }, { "epoch": 0.621492128678987, "grad_norm": 4.199991703033447, "learning_rate": 9.68904109589041e-07, "log_odds_chosen": -0.040699273347854614, "log_odds_ratio": -0.8503872156143188, "logits/chosen": -0.9332480430603027, "logits/rejected": -0.9063323736190796, "logps/chosen": -3.057600498199463, "logps/rejected": -3.02643084526062, "loss": 2.3814, "nll_loss": 2.2963812351226807, "rewards/accuracies": 0.5, "rewards/chosen": -0.30576008558273315, "rewards/margins": -0.0031169727444648743, "rewards/rejected": -0.3026431202888489, "step": 227 }, { "epoch": 0.6242299794661191, "grad_norm": 3.834066152572632, "learning_rate": 9.687671232876712e-07, "log_odds_chosen": -0.2807025909423828, "log_odds_ratio": -1.1530029773712158, "logits/chosen": -1.0564053058624268, "logits/rejected": -1.0589449405670166, "logps/chosen": -2.8794260025024414, "logps/rejected": -2.5508785247802734, "loss": 2.3964, "nll_loss": 2.2810792922973633, "rewards/accuracies": 0.5, "rewards/chosen": -0.2879425883293152, "rewards/margins": -0.03285474330186844, "rewards/rejected": -0.25508785247802734, "step": 228 }, { "epoch": 0.6269678302532512, "grad_norm": 4.31780481338501, "learning_rate": 9.686301369863014e-07, "log_odds_chosen": -0.8291745781898499, "log_odds_ratio": -1.4044253826141357, "logits/chosen": -0.9595439434051514, "logits/rejected": -0.9351110458374023, "logps/chosen": -3.8305022716522217, "logps/rejected": -3.004244804382324, "loss": 2.5221, "nll_loss": 2.3816986083984375, "rewards/accuracies": 0.375, "rewards/chosen": -0.38305020332336426, "rewards/margins": -0.08262572437524796, "rewards/rejected": -0.3004245162010193, "step": 229 }, { "epoch": 0.6297056810403833, "grad_norm": 4.318235874176025, "learning_rate": 9.684931506849314e-07, "log_odds_chosen": -0.6298345327377319, "log_odds_ratio": -1.35149085521698, "logits/chosen": -0.9262927770614624, "logits/rejected": -0.9213173389434814, "logps/chosen": -3.686501979827881, "logps/rejected": -3.0053086280822754, "loss": 2.4364, "nll_loss": 2.301243782043457, "rewards/accuracies": 0.5, "rewards/chosen": -0.3686501979827881, "rewards/margins": -0.06811932474374771, "rewards/rejected": -0.30053091049194336, "step": 230 }, { "epoch": 0.6324435318275154, "grad_norm": 4.1731953620910645, "learning_rate": 9.683561643835616e-07, "log_odds_chosen": 0.5728000402450562, "log_odds_ratio": -0.9288334846496582, "logits/chosen": -0.9096280336380005, "logits/rejected": -0.9558812975883484, "logps/chosen": -2.790767192840576, "logps/rejected": -3.3480634689331055, "loss": 2.3935, "nll_loss": 2.300657272338867, "rewards/accuracies": 0.5, "rewards/chosen": -0.2790767252445221, "rewards/margins": 0.055729612708091736, "rewards/rejected": -0.33480632305145264, "step": 231 }, { "epoch": 0.6351813826146475, "grad_norm": 4.868379592895508, "learning_rate": 9.682191780821918e-07, "log_odds_chosen": -1.0285420417785645, "log_odds_ratio": -1.6274021863937378, "logits/chosen": -0.9847490787506104, "logits/rejected": -0.9709699749946594, "logps/chosen": -4.546965599060059, "logps/rejected": -3.46048903465271, "loss": 2.5588, "nll_loss": 2.3960347175598145, "rewards/accuracies": 0.25, "rewards/chosen": -0.4546965956687927, "rewards/margins": -0.1086476743221283, "rewards/rejected": -0.3460489511489868, "step": 232 }, { "epoch": 0.6379192334017796, "grad_norm": 4.27667760848999, "learning_rate": 9.680821917808218e-07, "log_odds_chosen": -0.5551021695137024, "log_odds_ratio": -1.0968503952026367, "logits/chosen": -0.9317228198051453, "logits/rejected": -0.9101227521896362, "logps/chosen": -3.1884560585021973, "logps/rejected": -2.627218008041382, "loss": 2.4683, "nll_loss": 2.3586225509643555, "rewards/accuracies": 0.375, "rewards/chosen": -0.31884559988975525, "rewards/margins": -0.056123822927474976, "rewards/rejected": -0.2627217769622803, "step": 233 }, { "epoch": 0.6406570841889117, "grad_norm": 4.07570219039917, "learning_rate": 9.67945205479452e-07, "log_odds_chosen": -0.3105655014514923, "log_odds_ratio": -1.0147334337234497, "logits/chosen": -0.9528865814208984, "logits/rejected": -0.9749501943588257, "logps/chosen": -2.692866802215576, "logps/rejected": -2.379256248474121, "loss": 2.4426, "nll_loss": 2.3411505222320557, "rewards/accuracies": 0.5, "rewards/chosen": -0.2692866623401642, "rewards/margins": -0.03136105835437775, "rewards/rejected": -0.23792561888694763, "step": 234 }, { "epoch": 0.6433949349760438, "grad_norm": 4.30022668838501, "learning_rate": 9.678082191780823e-07, "log_odds_chosen": -1.0366877317428589, "log_odds_ratio": -1.5941513776779175, "logits/chosen": -0.9967976808547974, "logits/rejected": -0.9657137393951416, "logps/chosen": -3.6661393642425537, "logps/rejected": -2.636957883834839, "loss": 2.5747, "nll_loss": 2.41532301902771, "rewards/accuracies": 0.375, "rewards/chosen": -0.3666139543056488, "rewards/margins": -0.10291816294193268, "rewards/rejected": -0.26369577646255493, "step": 235 }, { "epoch": 0.6461327857631759, "grad_norm": 4.321117401123047, "learning_rate": 9.676712328767122e-07, "log_odds_chosen": -1.0741591453552246, "log_odds_ratio": -1.6001557111740112, "logits/chosen": -1.0003738403320312, "logits/rejected": -0.9842618703842163, "logps/chosen": -3.8238306045532227, "logps/rejected": -2.7307722568511963, "loss": 2.5053, "nll_loss": 2.3452370166778564, "rewards/accuracies": 0.375, "rewards/chosen": -0.3823830485343933, "rewards/margins": -0.10930582135915756, "rewards/rejected": -0.27307724952697754, "step": 236 }, { "epoch": 0.648870636550308, "grad_norm": 4.093625068664551, "learning_rate": 9.675342465753424e-07, "log_odds_chosen": -0.3987206816673279, "log_odds_ratio": -1.214679479598999, "logits/chosen": -1.0868710279464722, "logits/rejected": -1.1236562728881836, "logps/chosen": -3.3267555236816406, "logps/rejected": -2.9025492668151855, "loss": 2.4422, "nll_loss": 2.3207404613494873, "rewards/accuracies": 0.625, "rewards/chosen": -0.332675576210022, "rewards/margins": -0.04242062568664551, "rewards/rejected": -0.2902549207210541, "step": 237 }, { "epoch": 0.6516084873374401, "grad_norm": 4.294052600860596, "learning_rate": 9.673972602739724e-07, "log_odds_chosen": -0.1746833324432373, "log_odds_ratio": -0.8873229026794434, "logits/chosen": -0.9749643802642822, "logits/rejected": -0.903904914855957, "logps/chosen": -3.2231390476226807, "logps/rejected": -3.052039861679077, "loss": 2.5021, "nll_loss": 2.413332462310791, "rewards/accuracies": 0.5, "rewards/chosen": -0.32231390476226807, "rewards/margins": -0.01710992306470871, "rewards/rejected": -0.30520397424697876, "step": 238 }, { "epoch": 0.6543463381245722, "grad_norm": 4.0642218589782715, "learning_rate": 9.672602739726026e-07, "log_odds_chosen": -0.5641672611236572, "log_odds_ratio": -1.246581792831421, "logits/chosen": -1.0149550437927246, "logits/rejected": -1.107174038887024, "logps/chosen": -3.2968668937683105, "logps/rejected": -2.693507432937622, "loss": 2.4298, "nll_loss": 2.3051836490631104, "rewards/accuracies": 0.375, "rewards/chosen": -0.32968670129776, "rewards/margins": -0.060335978865623474, "rewards/rejected": -0.26935070753097534, "step": 239 }, { "epoch": 0.6570841889117043, "grad_norm": 3.2201805114746094, "learning_rate": 9.671232876712329e-07, "log_odds_chosen": 0.1500948816537857, "log_odds_ratio": -0.7290389537811279, "logits/chosen": -0.8305591344833374, "logits/rejected": -0.9294684529304504, "logps/chosen": -2.2977278232574463, "logps/rejected": -2.3864212036132812, "loss": 2.2276, "nll_loss": 2.1546740531921387, "rewards/accuracies": 0.625, "rewards/chosen": -0.22977277636528015, "rewards/margins": 0.008869349956512451, "rewards/rejected": -0.2386421412229538, "step": 240 }, { "epoch": 0.6598220396988365, "grad_norm": 4.0155029296875, "learning_rate": 9.669863013698628e-07, "log_odds_chosen": -1.2940527200698853, "log_odds_ratio": -1.75630784034729, "logits/chosen": -0.993314266204834, "logits/rejected": -0.9482574462890625, "logps/chosen": -3.3137779235839844, "logps/rejected": -2.0605220794677734, "loss": 2.4825, "nll_loss": 2.3069136142730713, "rewards/accuracies": 0.375, "rewards/chosen": -0.3313778042793274, "rewards/margins": -0.12532557547092438, "rewards/rejected": -0.20605221390724182, "step": 241 }, { "epoch": 0.6625598904859685, "grad_norm": 4.08660888671875, "learning_rate": 9.66849315068493e-07, "log_odds_chosen": 0.0665028989315033, "log_odds_ratio": -1.095001220703125, "logits/chosen": -1.0780324935913086, "logits/rejected": -1.030825138092041, "logps/chosen": -3.4952516555786133, "logps/rejected": -3.531975030899048, "loss": 2.3998, "nll_loss": 2.290332555770874, "rewards/accuracies": 0.5, "rewards/chosen": -0.3495251536369324, "rewards/margins": 0.003672342747449875, "rewards/rejected": -0.35319751501083374, "step": 242 }, { "epoch": 0.6652977412731006, "grad_norm": 4.73845100402832, "learning_rate": 9.667123287671233e-07, "log_odds_chosen": -0.7787599563598633, "log_odds_ratio": -1.3014451265335083, "logits/chosen": -1.0552256107330322, "logits/rejected": -1.0826575756072998, "logps/chosen": -3.956707000732422, "logps/rejected": -3.1523797512054443, "loss": 2.5053, "nll_loss": 2.375178098678589, "rewards/accuracies": 0.375, "rewards/chosen": -0.39567071199417114, "rewards/margins": -0.08043275028467178, "rewards/rejected": -0.31523796916007996, "step": 243 }, { "epoch": 0.6680355920602327, "grad_norm": 4.190962314605713, "learning_rate": 9.665753424657532e-07, "log_odds_chosen": -0.6035221219062805, "log_odds_ratio": -1.2573113441467285, "logits/chosen": -1.020910620689392, "logits/rejected": -1.038649320602417, "logps/chosen": -3.3590474128723145, "logps/rejected": -2.7322394847869873, "loss": 2.3984, "nll_loss": 2.27264404296875, "rewards/accuracies": 0.5, "rewards/chosen": -0.3359047472476959, "rewards/margins": -0.06268079578876495, "rewards/rejected": -0.2732239365577698, "step": 244 }, { "epoch": 0.6707734428473648, "grad_norm": 3.4989686012268066, "learning_rate": 9.664383561643835e-07, "log_odds_chosen": -0.21129855513572693, "log_odds_ratio": -0.9575204253196716, "logits/chosen": -0.9059820175170898, "logits/rejected": -0.8884698748588562, "logps/chosen": -2.472559690475464, "logps/rejected": -2.235328435897827, "loss": 2.3659, "nll_loss": 2.2701756954193115, "rewards/accuracies": 0.5, "rewards/chosen": -0.24725596606731415, "rewards/margins": -0.02372313104569912, "rewards/rejected": -0.22353285551071167, "step": 245 }, { "epoch": 0.6735112936344969, "grad_norm": 3.735572338104248, "learning_rate": 9.663013698630137e-07, "log_odds_chosen": 0.08653882145881653, "log_odds_ratio": -0.8294547200202942, "logits/chosen": -1.039703607559204, "logits/rejected": -1.0718953609466553, "logps/chosen": -2.6585192680358887, "logps/rejected": -2.702073574066162, "loss": 2.2909, "nll_loss": 2.2079925537109375, "rewards/accuracies": 0.5, "rewards/chosen": -0.2658519148826599, "rewards/margins": 0.0043554529547691345, "rewards/rejected": -0.27020737528800964, "step": 246 }, { "epoch": 0.676249144421629, "grad_norm": 4.182383060455322, "learning_rate": 9.661643835616437e-07, "log_odds_chosen": -1.9086259603500366, "log_odds_ratio": -2.235088586807251, "logits/chosen": -1.0133917331695557, "logits/rejected": -1.0631120204925537, "logps/chosen": -3.9785337448120117, "logps/rejected": -2.125948190689087, "loss": 2.4925, "nll_loss": 2.268954277038574, "rewards/accuracies": 0.25, "rewards/chosen": -0.39785340428352356, "rewards/margins": -0.18525856733322144, "rewards/rejected": -0.21259482204914093, "step": 247 }, { "epoch": 0.6789869952087612, "grad_norm": 3.7068819999694824, "learning_rate": 9.660273972602739e-07, "log_odds_chosen": -0.7297694683074951, "log_odds_ratio": -1.3391586542129517, "logits/chosen": -1.0213006734848022, "logits/rejected": -1.0185774564743042, "logps/chosen": -2.6673169136047363, "logps/rejected": -1.9736275672912598, "loss": 2.4289, "nll_loss": 2.294996976852417, "rewards/accuracies": 0.5, "rewards/chosen": -0.2667316794395447, "rewards/margins": -0.06936892867088318, "rewards/rejected": -0.1973627507686615, "step": 248 }, { "epoch": 0.6817248459958932, "grad_norm": 3.811455488204956, "learning_rate": 9.65890410958904e-07, "log_odds_chosen": -0.10113978385925293, "log_odds_ratio": -0.861381471157074, "logits/chosen": -0.9665676355361938, "logits/rejected": -0.9620635509490967, "logps/chosen": -2.958935260772705, "logps/rejected": -2.841553211212158, "loss": 2.3263, "nll_loss": 2.2401437759399414, "rewards/accuracies": 0.5, "rewards/chosen": -0.29589352011680603, "rewards/margins": -0.01173819787800312, "rewards/rejected": -0.28415533900260925, "step": 249 }, { "epoch": 0.6844626967830253, "grad_norm": 3.004801034927368, "learning_rate": 9.657534246575343e-07, "log_odds_chosen": 0.7784677147865295, "log_odds_ratio": -0.4183061718940735, "logits/chosen": -0.7214312553405762, "logits/rejected": -0.7889113426208496, "logps/chosen": -1.4143115282058716, "logps/rejected": -2.064990758895874, "loss": 2.1068, "nll_loss": 2.0650060176849365, "rewards/accuracies": 1.0, "rewards/chosen": -0.14143115282058716, "rewards/margins": 0.06506792455911636, "rewards/rejected": -0.20649906992912292, "step": 250 }, { "epoch": 0.6872005475701575, "grad_norm": 3.7010600566864014, "learning_rate": 9.656164383561643e-07, "log_odds_chosen": 0.3649912178516388, "log_odds_ratio": -0.617479681968689, "logits/chosen": -1.036313772201538, "logits/rejected": -1.0531359910964966, "logps/chosen": -2.495422840118408, "logps/rejected": -2.787935733795166, "loss": 2.2407, "nll_loss": 2.178985118865967, "rewards/accuracies": 0.75, "rewards/chosen": -0.2495422661304474, "rewards/margins": 0.02925129421055317, "rewards/rejected": -0.2787935733795166, "step": 251 }, { "epoch": 0.6899383983572895, "grad_norm": 3.2668511867523193, "learning_rate": 9.654794520547945e-07, "log_odds_chosen": -0.3671860098838806, "log_odds_ratio": -1.0403599739074707, "logits/chosen": -0.9303092360496521, "logits/rejected": -0.9801923036575317, "logps/chosen": -2.6777195930480957, "logps/rejected": -2.2845330238342285, "loss": 2.2176, "nll_loss": 2.1135189533233643, "rewards/accuracies": 0.375, "rewards/chosen": -0.26777198910713196, "rewards/margins": -0.039318691939115524, "rewards/rejected": -0.22845329344272614, "step": 252 }, { "epoch": 0.6926762491444216, "grad_norm": 3.961451768875122, "learning_rate": 9.653424657534247e-07, "log_odds_chosen": -0.154715433716774, "log_odds_ratio": -1.0239768028259277, "logits/chosen": -1.0796679258346558, "logits/rejected": -1.1337047815322876, "logps/chosen": -3.1148269176483154, "logps/rejected": -2.890984535217285, "loss": 2.3452, "nll_loss": 2.2428231239318848, "rewards/accuracies": 0.625, "rewards/chosen": -0.311482697725296, "rewards/margins": -0.022384248673915863, "rewards/rejected": -0.28909844160079956, "step": 253 }, { "epoch": 0.6954140999315537, "grad_norm": 4.3832316398620605, "learning_rate": 9.652054794520549e-07, "log_odds_chosen": -0.8107893466949463, "log_odds_ratio": -1.7644679546356201, "logits/chosen": -0.9707953333854675, "logits/rejected": -0.9189150929450989, "logps/chosen": -4.161672592163086, "logps/rejected": -3.30440092086792, "loss": 2.5758, "nll_loss": 2.3993871212005615, "rewards/accuracies": 0.25, "rewards/chosen": -0.4161672592163086, "rewards/margins": -0.08572715520858765, "rewards/rejected": -0.33044010400772095, "step": 254 }, { "epoch": 0.6981519507186859, "grad_norm": 3.7314765453338623, "learning_rate": 9.650684931506849e-07, "log_odds_chosen": -0.25010353326797485, "log_odds_ratio": -0.9939874410629272, "logits/chosen": -1.1048487424850464, "logits/rejected": -1.1313409805297852, "logps/chosen": -3.090998888015747, "logps/rejected": -2.8056259155273438, "loss": 2.4332, "nll_loss": 2.333775520324707, "rewards/accuracies": 0.625, "rewards/chosen": -0.3090999126434326, "rewards/margins": -0.028537316247820854, "rewards/rejected": -0.2805625796318054, "step": 255 }, { "epoch": 0.7008898015058179, "grad_norm": 4.024218559265137, "learning_rate": 9.64931506849315e-07, "log_odds_chosen": -0.4550962746143341, "log_odds_ratio": -1.0956734418869019, "logits/chosen": -1.0343581438064575, "logits/rejected": -0.9991025924682617, "logps/chosen": -3.3945631980895996, "logps/rejected": -2.9179649353027344, "loss": 2.4562, "nll_loss": 2.346625328063965, "rewards/accuracies": 0.375, "rewards/chosen": -0.33945631980895996, "rewards/margins": -0.047659821808338165, "rewards/rejected": -0.2917965054512024, "step": 256 }, { "epoch": 0.70362765229295, "grad_norm": 3.2457704544067383, "learning_rate": 9.647945205479453e-07, "log_odds_chosen": -0.18340270221233368, "log_odds_ratio": -0.8407537937164307, "logits/chosen": -0.9943108558654785, "logits/rejected": -1.0051279067993164, "logps/chosen": -2.365363597869873, "logps/rejected": -2.1673166751861572, "loss": 2.1981, "nll_loss": 2.114036798477173, "rewards/accuracies": 0.625, "rewards/chosen": -0.2365363985300064, "rewards/margins": -0.01980472356081009, "rewards/rejected": -0.21673166751861572, "step": 257 }, { "epoch": 0.7063655030800822, "grad_norm": 3.181410074234009, "learning_rate": 9.646575342465753e-07, "log_odds_chosen": 0.10641276836395264, "log_odds_ratio": -0.9758596420288086, "logits/chosen": -0.8322647213935852, "logits/rejected": -0.8911882638931274, "logps/chosen": -2.2567152976989746, "logps/rejected": -2.3125860691070557, "loss": 2.1264, "nll_loss": 2.0287904739379883, "rewards/accuracies": 0.625, "rewards/chosen": -0.22567151486873627, "rewards/margins": 0.005587078630924225, "rewards/rejected": -0.2312586009502411, "step": 258 }, { "epoch": 0.7091033538672142, "grad_norm": 4.137002468109131, "learning_rate": 9.645205479452055e-07, "log_odds_chosen": -1.0977280139923096, "log_odds_ratio": -1.4990580081939697, "logits/chosen": -1.020631194114685, "logits/rejected": -0.9796189069747925, "logps/chosen": -3.972083568572998, "logps/rejected": -2.905876636505127, "loss": 2.5568, "nll_loss": 2.406928062438965, "rewards/accuracies": 0.25, "rewards/chosen": -0.3972083628177643, "rewards/margins": -0.1066206693649292, "rewards/rejected": -0.2905876934528351, "step": 259 }, { "epoch": 0.7118412046543463, "grad_norm": 3.583864212036133, "learning_rate": 9.643835616438357e-07, "log_odds_chosen": 0.45731592178344727, "log_odds_ratio": -0.5857783555984497, "logits/chosen": -0.9042763710021973, "logits/rejected": -0.8922066688537598, "logps/chosen": -2.1629159450531006, "logps/rejected": -2.5316455364227295, "loss": 2.2413, "nll_loss": 2.1827621459960938, "rewards/accuracies": 0.75, "rewards/chosen": -0.216291606426239, "rewards/margins": 0.03687293827533722, "rewards/rejected": -0.25316452980041504, "step": 260 }, { "epoch": 0.7145790554414785, "grad_norm": 4.157106876373291, "learning_rate": 9.642465753424657e-07, "log_odds_chosen": -0.9863892197608948, "log_odds_ratio": -1.6074934005737305, "logits/chosen": -1.085724115371704, "logits/rejected": -1.0686709880828857, "logps/chosen": -3.963322162628174, "logps/rejected": -2.9802582263946533, "loss": 2.5248, "nll_loss": 2.3640403747558594, "rewards/accuracies": 0.5, "rewards/chosen": -0.3963322043418884, "rewards/margins": -0.09830638766288757, "rewards/rejected": -0.29802581667900085, "step": 261 }, { "epoch": 0.7173169062286106, "grad_norm": 3.695345401763916, "learning_rate": 9.641095890410959e-07, "log_odds_chosen": -1.0082416534423828, "log_odds_ratio": -1.4876759052276611, "logits/chosen": -0.9699403047561646, "logits/rejected": -0.986160397529602, "logps/chosen": -3.448967218399048, "logps/rejected": -2.4396114349365234, "loss": 2.3212, "nll_loss": 2.172393798828125, "rewards/accuracies": 0.375, "rewards/chosen": -0.34489673376083374, "rewards/margins": -0.10093558579683304, "rewards/rejected": -0.2439611256122589, "step": 262 }, { "epoch": 0.7200547570157426, "grad_norm": 3.2534730434417725, "learning_rate": 9.63972602739726e-07, "log_odds_chosen": -0.5699167251586914, "log_odds_ratio": -1.2673046588897705, "logits/chosen": -0.9330844283103943, "logits/rejected": -1.039818286895752, "logps/chosen": -2.8956832885742188, "logps/rejected": -2.322484016418457, "loss": 2.3245, "nll_loss": 2.1977789402008057, "rewards/accuracies": 0.625, "rewards/chosen": -0.28956836462020874, "rewards/margins": -0.057319946587085724, "rewards/rejected": -0.23224839568138123, "step": 263 }, { "epoch": 0.7227926078028748, "grad_norm": 4.479331970214844, "learning_rate": 9.63835616438356e-07, "log_odds_chosen": -0.9068465232849121, "log_odds_ratio": -1.3410214185714722, "logits/chosen": -0.9647909998893738, "logits/rejected": -0.9307161569595337, "logps/chosen": -3.9227781295776367, "logps/rejected": -3.027977466583252, "loss": 2.4978, "nll_loss": 2.363692283630371, "rewards/accuracies": 0.25, "rewards/chosen": -0.3922778069972992, "rewards/margins": -0.08948005735874176, "rewards/rejected": -0.30279776453971863, "step": 264 }, { "epoch": 0.7255304585900069, "grad_norm": 4.578000545501709, "learning_rate": 9.636986301369863e-07, "log_odds_chosen": -1.7284036874771118, "log_odds_ratio": -2.120065450668335, "logits/chosen": -1.017161250114441, "logits/rejected": -1.013040542602539, "logps/chosen": -4.7739667892456055, "logps/rejected": -3.0563459396362305, "loss": 2.6407, "nll_loss": 2.4287309646606445, "rewards/accuracies": 0.25, "rewards/chosen": -0.477396696805954, "rewards/margins": -0.17176209390163422, "rewards/rejected": -0.30563461780548096, "step": 265 }, { "epoch": 0.7282683093771389, "grad_norm": 3.975260019302368, "learning_rate": 9.635616438356165e-07, "log_odds_chosen": 0.4134540259838104, "log_odds_ratio": -0.6659979820251465, "logits/chosen": -0.930891752243042, "logits/rejected": -0.9127289056777954, "logps/chosen": -2.863473892211914, "logps/rejected": -3.2181553840637207, "loss": 2.3698, "nll_loss": 2.3032424449920654, "rewards/accuracies": 0.5, "rewards/chosen": -0.2863473892211914, "rewards/margins": 0.035468149930238724, "rewards/rejected": -0.321815550327301, "step": 266 }, { "epoch": 0.731006160164271, "grad_norm": 3.4487216472625732, "learning_rate": 9.634246575342465e-07, "log_odds_chosen": 0.07437797635793686, "log_odds_ratio": -0.7408564686775208, "logits/chosen": -1.0537686347961426, "logits/rejected": -1.0807113647460938, "logps/chosen": -2.4779090881347656, "logps/rejected": -2.544412612915039, "loss": 2.3478, "nll_loss": 2.2736892700195312, "rewards/accuracies": 0.5, "rewards/chosen": -0.24779090285301208, "rewards/margins": 0.006650369614362717, "rewards/rejected": -0.2544412612915039, "step": 267 }, { "epoch": 0.7337440109514032, "grad_norm": 4.348547458648682, "learning_rate": 9.632876712328767e-07, "log_odds_chosen": -1.0221154689788818, "log_odds_ratio": -1.5144933462142944, "logits/chosen": -1.0336339473724365, "logits/rejected": -1.0021426677703857, "logps/chosen": -3.985659599304199, "logps/rejected": -2.9494199752807617, "loss": 2.496, "nll_loss": 2.3445944786071777, "rewards/accuracies": 0.25, "rewards/chosen": -0.39856594800949097, "rewards/margins": -0.10362398624420166, "rewards/rejected": -0.2949419915676117, "step": 268 }, { "epoch": 0.7364818617385352, "grad_norm": 3.753183364868164, "learning_rate": 9.631506849315067e-07, "log_odds_chosen": 0.42555737495422363, "log_odds_ratio": -0.8348201513290405, "logits/chosen": -1.085829496383667, "logits/rejected": -1.1223931312561035, "logps/chosen": -2.9607667922973633, "logps/rejected": -3.324558734893799, "loss": 2.3093, "nll_loss": 2.225847005844116, "rewards/accuracies": 0.5, "rewards/chosen": -0.2960767149925232, "rewards/margins": 0.036379165947437286, "rewards/rejected": -0.33245590329170227, "step": 269 }, { "epoch": 0.7392197125256673, "grad_norm": 3.344398260116577, "learning_rate": 9.630136986301369e-07, "log_odds_chosen": 0.3498121201992035, "log_odds_ratio": -0.6309839487075806, "logits/chosen": -0.8306611776351929, "logits/rejected": -0.84551602602005, "logps/chosen": -2.2744736671447754, "logps/rejected": -2.559316635131836, "loss": 2.207, "nll_loss": 2.1439168453216553, "rewards/accuracies": 0.75, "rewards/chosen": -0.22744734585285187, "rewards/margins": 0.028484296053647995, "rewards/rejected": -0.25593164563179016, "step": 270 }, { "epoch": 0.7419575633127995, "grad_norm": 3.7676291465759277, "learning_rate": 9.62876712328767e-07, "log_odds_chosen": -0.13998891413211823, "log_odds_ratio": -0.9128775596618652, "logits/chosen": -1.0142498016357422, "logits/rejected": -1.0211384296417236, "logps/chosen": -2.8852920532226562, "logps/rejected": -2.712676525115967, "loss": 2.3449, "nll_loss": 2.253614664077759, "rewards/accuracies": 0.375, "rewards/chosen": -0.2885292172431946, "rewards/margins": -0.017261585220694542, "rewards/rejected": -0.2712676525115967, "step": 271 }, { "epoch": 0.7446954140999316, "grad_norm": 3.7035040855407715, "learning_rate": 9.62739726027397e-07, "log_odds_chosen": 0.18126508593559265, "log_odds_ratio": -0.9903463125228882, "logits/chosen": -0.9550225734710693, "logits/rejected": -0.9787377119064331, "logps/chosen": -3.202779769897461, "logps/rejected": -3.373851776123047, "loss": 2.2724, "nll_loss": 2.1733667850494385, "rewards/accuracies": 0.25, "rewards/chosen": -0.32027798891067505, "rewards/margins": 0.01710718870162964, "rewards/rejected": -0.3373851776123047, "step": 272 }, { "epoch": 0.7474332648870636, "grad_norm": 3.082836627960205, "learning_rate": 9.626027397260273e-07, "log_odds_chosen": 0.30858296155929565, "log_odds_ratio": -0.6647326350212097, "logits/chosen": -0.9235747456550598, "logits/rejected": -1.0406126976013184, "logps/chosen": -2.5638933181762695, "logps/rejected": -2.837824583053589, "loss": 2.1689, "nll_loss": 2.102450370788574, "rewards/accuracies": 0.625, "rewards/chosen": -0.2563893496990204, "rewards/margins": 0.027393102645874023, "rewards/rejected": -0.2837824523448944, "step": 273 }, { "epoch": 0.7501711156741958, "grad_norm": 4.123567581176758, "learning_rate": 9.624657534246575e-07, "log_odds_chosen": -0.4687694013118744, "log_odds_ratio": -1.008711814880371, "logits/chosen": -0.9492815732955933, "logits/rejected": -0.9162305593490601, "logps/chosen": -3.2926206588745117, "logps/rejected": -2.8290178775787354, "loss": 2.407, "nll_loss": 2.3060946464538574, "rewards/accuracies": 0.25, "rewards/chosen": -0.3292620778083801, "rewards/margins": -0.04636029154062271, "rewards/rejected": -0.282901793718338, "step": 274 }, { "epoch": 0.7529089664613279, "grad_norm": 3.7035861015319824, "learning_rate": 9.623287671232875e-07, "log_odds_chosen": 0.5293578505516052, "log_odds_ratio": -1.384310007095337, "logits/chosen": -0.9541219472885132, "logits/rejected": -1.0947502851486206, "logps/chosen": -3.3561248779296875, "logps/rejected": -3.797318458557129, "loss": 2.2931, "nll_loss": 2.1546719074249268, "rewards/accuracies": 0.5, "rewards/chosen": -0.33561253547668457, "rewards/margins": 0.04411932826042175, "rewards/rejected": -0.37973183393478394, "step": 275 }, { "epoch": 0.75564681724846, "grad_norm": 4.028528213500977, "learning_rate": 9.621917808219177e-07, "log_odds_chosen": -0.18759357929229736, "log_odds_ratio": -1.0675842761993408, "logits/chosen": -0.8463407754898071, "logits/rejected": -0.8854281902313232, "logps/chosen": -3.1830787658691406, "logps/rejected": -2.9895763397216797, "loss": 2.3456, "nll_loss": 2.238804817199707, "rewards/accuracies": 0.5, "rewards/chosen": -0.3183078467845917, "rewards/margins": -0.0193502064794302, "rewards/rejected": -0.2989576458930969, "step": 276 }, { "epoch": 0.758384668035592, "grad_norm": 4.196558952331543, "learning_rate": 9.62054794520548e-07, "log_odds_chosen": -0.5785447955131531, "log_odds_ratio": -1.2900818586349487, "logits/chosen": -1.0493900775909424, "logits/rejected": -1.0551508665084839, "logps/chosen": -3.912980556488037, "logps/rejected": -3.3437535762786865, "loss": 2.4526, "nll_loss": 2.3236188888549805, "rewards/accuracies": 0.375, "rewards/chosen": -0.3912980556488037, "rewards/margins": -0.05692268908023834, "rewards/rejected": -0.3343753516674042, "step": 277 }, { "epoch": 0.7611225188227242, "grad_norm": 3.8787364959716797, "learning_rate": 9.61917808219178e-07, "log_odds_chosen": 0.3651062548160553, "log_odds_ratio": -0.7989082932472229, "logits/chosen": -0.9308514595031738, "logits/rejected": -1.0169018507003784, "logps/chosen": -2.5856573581695557, "logps/rejected": -2.931920051574707, "loss": 2.3276, "nll_loss": 2.2477543354034424, "rewards/accuracies": 0.375, "rewards/chosen": -0.2585657238960266, "rewards/margins": 0.034626297652721405, "rewards/rejected": -0.2931920289993286, "step": 278 }, { "epoch": 0.7638603696098563, "grad_norm": 3.710371255874634, "learning_rate": 9.61780821917808e-07, "log_odds_chosen": -1.316992163658142, "log_odds_ratio": -1.76692533493042, "logits/chosen": -1.0448858737945557, "logits/rejected": -1.012679100036621, "logps/chosen": -3.9452011585235596, "logps/rejected": -2.635772705078125, "loss": 2.4225, "nll_loss": 2.245845079421997, "rewards/accuracies": 0.375, "rewards/chosen": -0.3945201635360718, "rewards/margins": -0.13094285130500793, "rewards/rejected": -0.26357728242874146, "step": 279 }, { "epoch": 0.7665982203969883, "grad_norm": 4.426299095153809, "learning_rate": 9.616438356164383e-07, "log_odds_chosen": -0.38395577669143677, "log_odds_ratio": -1.0013072490692139, "logits/chosen": -1.02006995677948, "logits/rejected": -0.9758345484733582, "logps/chosen": -4.464716911315918, "logps/rejected": -4.0957794189453125, "loss": 2.5232, "nll_loss": 2.4231019020080566, "rewards/accuracies": 0.5, "rewards/chosen": -0.4464716911315918, "rewards/margins": -0.03689375892281532, "rewards/rejected": -0.4095779359340668, "step": 280 }, { "epoch": 0.7693360711841205, "grad_norm": 2.8686375617980957, "learning_rate": 9.615068493150685e-07, "log_odds_chosen": 0.28188157081604004, "log_odds_ratio": -0.6527458429336548, "logits/chosen": -0.9116687774658203, "logits/rejected": -0.9141849875450134, "logps/chosen": -1.9053494930267334, "logps/rejected": -2.1530933380126953, "loss": 2.1085, "nll_loss": 2.043271064758301, "rewards/accuracies": 0.75, "rewards/chosen": -0.19053496420383453, "rewards/margins": 0.024774374440312386, "rewards/rejected": -0.21530933678150177, "step": 281 }, { "epoch": 0.7720739219712526, "grad_norm": 2.9201526641845703, "learning_rate": 9.613698630136985e-07, "log_odds_chosen": 0.3310529887676239, "log_odds_ratio": -0.7092412114143372, "logits/chosen": -0.7297369241714478, "logits/rejected": -0.8032384514808655, "logps/chosen": -2.0470876693725586, "logps/rejected": -2.3104166984558105, "loss": 2.0892, "nll_loss": 2.018325090408325, "rewards/accuracies": 0.875, "rewards/chosen": -0.2047087550163269, "rewards/margins": 0.02633291482925415, "rewards/rejected": -0.23104166984558105, "step": 282 }, { "epoch": 0.7748117727583846, "grad_norm": 3.1207149028778076, "learning_rate": 9.612328767123287e-07, "log_odds_chosen": 0.24756112694740295, "log_odds_ratio": -0.7290762066841125, "logits/chosen": -0.9412771463394165, "logits/rejected": -0.9941659569740295, "logps/chosen": -3.0447475910186768, "logps/rejected": -3.223560333251953, "loss": 2.2303, "nll_loss": 2.157381534576416, "rewards/accuracies": 0.625, "rewards/chosen": -0.304474800825119, "rewards/margins": 0.017881257459521294, "rewards/rejected": -0.32235604524612427, "step": 283 }, { "epoch": 0.7775496235455168, "grad_norm": 3.7015933990478516, "learning_rate": 9.61095890410959e-07, "log_odds_chosen": -0.0046253502368927, "log_odds_ratio": -0.7217745780944824, "logits/chosen": -1.0853838920593262, "logits/rejected": -1.0766994953155518, "logps/chosen": -2.5785365104675293, "logps/rejected": -2.553225040435791, "loss": 2.2649, "nll_loss": 2.1927497386932373, "rewards/accuracies": 0.5, "rewards/chosen": -0.2578536868095398, "rewards/margins": -0.002531144767999649, "rewards/rejected": -0.25532251596450806, "step": 284 }, { "epoch": 0.7802874743326489, "grad_norm": 3.181443929672241, "learning_rate": 9.609589041095891e-07, "log_odds_chosen": 0.7354199290275574, "log_odds_ratio": -1.077467679977417, "logits/chosen": -0.9594424366950989, "logits/rejected": -1.0323700904846191, "logps/chosen": -2.8857624530792236, "logps/rejected": -3.5799038410186768, "loss": 2.217, "nll_loss": 2.1092841625213623, "rewards/accuracies": 0.625, "rewards/chosen": -0.28857624530792236, "rewards/margins": 0.06941413879394531, "rewards/rejected": -0.3579903841018677, "step": 285 }, { "epoch": 0.783025325119781, "grad_norm": 3.6000349521636963, "learning_rate": 9.608219178082191e-07, "log_odds_chosen": -0.6756335496902466, "log_odds_ratio": -1.22076416015625, "logits/chosen": -0.9795129299163818, "logits/rejected": -0.9505142569541931, "logps/chosen": -3.0686287879943848, "logps/rejected": -2.4016454219818115, "loss": 2.3101, "nll_loss": 2.1880111694335938, "rewards/accuracies": 0.375, "rewards/chosen": -0.30686289072036743, "rewards/margins": -0.0666983500123024, "rewards/rejected": -0.24016454815864563, "step": 286 }, { "epoch": 0.785763175906913, "grad_norm": 3.7574691772460938, "learning_rate": 9.606849315068493e-07, "log_odds_chosen": -1.1972659826278687, "log_odds_ratio": -1.7023842334747314, "logits/chosen": -1.101947546005249, "logits/rejected": -1.1032495498657227, "logps/chosen": -3.644113063812256, "logps/rejected": -2.4698405265808105, "loss": 2.3829, "nll_loss": 2.212641954421997, "rewards/accuracies": 0.375, "rewards/chosen": -0.36441129446029663, "rewards/margins": -0.11742725223302841, "rewards/rejected": -0.24698404967784882, "step": 287 }, { "epoch": 0.7885010266940452, "grad_norm": 3.427971363067627, "learning_rate": 9.605479452054795e-07, "log_odds_chosen": -0.8823611736297607, "log_odds_ratio": -1.4321930408477783, "logits/chosen": -1.0173064470291138, "logits/rejected": -0.9886384010314941, "logps/chosen": -3.3173434734344482, "logps/rejected": -2.460110664367676, "loss": 2.3637, "nll_loss": 2.2204508781433105, "rewards/accuracies": 0.375, "rewards/chosen": -0.3317343294620514, "rewards/margins": -0.08572325110435486, "rewards/rejected": -0.24601107835769653, "step": 288 }, { "epoch": 0.7912388774811773, "grad_norm": 3.139237642288208, "learning_rate": 9.604109589041095e-07, "log_odds_chosen": 0.049748923629522324, "log_odds_ratio": -0.7266517281532288, "logits/chosen": -0.8518579602241516, "logits/rejected": -0.9250311851501465, "logps/chosen": -2.222033977508545, "logps/rejected": -2.233980178833008, "loss": 2.212, "nll_loss": 2.139295816421509, "rewards/accuracies": 0.5, "rewards/chosen": -0.22220338881015778, "rewards/margins": 0.0011946288868784904, "rewards/rejected": -0.22339802980422974, "step": 289 }, { "epoch": 0.7939767282683093, "grad_norm": 4.441126346588135, "learning_rate": 9.602739726027397e-07, "log_odds_chosen": -0.7054169178009033, "log_odds_ratio": -1.1510062217712402, "logits/chosen": -0.9856531023979187, "logits/rejected": -0.9490070343017578, "logps/chosen": -3.5596885681152344, "logps/rejected": -2.8877756595611572, "loss": 2.4394, "nll_loss": 2.3242592811584473, "rewards/accuracies": 0.125, "rewards/chosen": -0.3559688925743103, "rewards/margins": -0.06719130277633667, "rewards/rejected": -0.28877758979797363, "step": 290 }, { "epoch": 0.7967145790554415, "grad_norm": 3.5319037437438965, "learning_rate": 9.6013698630137e-07, "log_odds_chosen": 0.4876423478126526, "log_odds_ratio": -0.6846709251403809, "logits/chosen": -0.9624636769294739, "logits/rejected": -0.9597715139389038, "logps/chosen": -2.9232754707336426, "logps/rejected": -3.378974199295044, "loss": 2.1672, "nll_loss": 2.09877610206604, "rewards/accuracies": 0.5, "rewards/chosen": -0.29232755303382874, "rewards/margins": 0.045569878071546555, "rewards/rejected": -0.3378974497318268, "step": 291 }, { "epoch": 0.7994524298425736, "grad_norm": 4.131087779998779, "learning_rate": 9.6e-07, "log_odds_chosen": -1.3263615369796753, "log_odds_ratio": -1.6940772533416748, "logits/chosen": -0.9587786793708801, "logits/rejected": -0.9669023752212524, "logps/chosen": -3.785202980041504, "logps/rejected": -2.484938144683838, "loss": 2.4102, "nll_loss": 2.2407562732696533, "rewards/accuracies": 0.25, "rewards/chosen": -0.37852028012275696, "rewards/margins": -0.1300264596939087, "rewards/rejected": -0.24849382042884827, "step": 292 }, { "epoch": 0.8021902806297057, "grad_norm": 3.793515682220459, "learning_rate": 9.598630136986301e-07, "log_odds_chosen": -0.43749359250068665, "log_odds_ratio": -1.162844181060791, "logits/chosen": -1.0858557224273682, "logits/rejected": -1.089351773262024, "logps/chosen": -3.3782267570495605, "logps/rejected": -2.951361894607544, "loss": 2.3145, "nll_loss": 2.198168992996216, "rewards/accuracies": 0.5, "rewards/chosen": -0.33782267570495605, "rewards/margins": -0.04268646612763405, "rewards/rejected": -0.2951362133026123, "step": 293 }, { "epoch": 0.8049281314168378, "grad_norm": 2.960265636444092, "learning_rate": 9.597260273972603e-07, "log_odds_chosen": 0.5263592600822449, "log_odds_ratio": -0.6754665970802307, "logits/chosen": -0.8089703917503357, "logits/rejected": -0.9280896186828613, "logps/chosen": -2.0640981197357178, "logps/rejected": -2.529508352279663, "loss": 2.1433, "nll_loss": 2.0757622718811035, "rewards/accuracies": 0.5, "rewards/chosen": -0.20640981197357178, "rewards/margins": 0.04654102399945259, "rewards/rejected": -0.25295084714889526, "step": 294 }, { "epoch": 0.8076659822039699, "grad_norm": 3.6663825511932373, "learning_rate": 9.595890410958903e-07, "log_odds_chosen": 0.4067961573600769, "log_odds_ratio": -0.6063193678855896, "logits/chosen": -1.0076802968978882, "logits/rejected": -1.0994625091552734, "logps/chosen": -2.5521323680877686, "logps/rejected": -2.908958911895752, "loss": 2.1759, "nll_loss": 2.1152825355529785, "rewards/accuracies": 0.5, "rewards/chosen": -0.2552132308483124, "rewards/margins": 0.03568263351917267, "rewards/rejected": -0.29089587926864624, "step": 295 }, { "epoch": 0.810403832991102, "grad_norm": 3.410095453262329, "learning_rate": 9.594520547945205e-07, "log_odds_chosen": -0.2807556092739105, "log_odds_ratio": -1.0626716613769531, "logits/chosen": -0.8914830088615417, "logits/rejected": -0.9218307733535767, "logps/chosen": -2.742384195327759, "logps/rejected": -2.42262601852417, "loss": 2.225, "nll_loss": 2.1187424659729004, "rewards/accuracies": 0.625, "rewards/chosen": -0.2742384076118469, "rewards/margins": -0.03197582811117172, "rewards/rejected": -0.2422625869512558, "step": 296 }, { "epoch": 0.813141683778234, "grad_norm": 4.425623416900635, "learning_rate": 9.593150684931507e-07, "log_odds_chosen": -1.402332067489624, "log_odds_ratio": -1.847739577293396, "logits/chosen": -1.0335044860839844, "logits/rejected": -0.992799699306488, "logps/chosen": -4.087936878204346, "logps/rejected": -2.7240560054779053, "loss": 2.4705, "nll_loss": 2.28568434715271, "rewards/accuracies": 0.25, "rewards/chosen": -0.40879371762275696, "rewards/margins": -0.1363881230354309, "rewards/rejected": -0.27240562438964844, "step": 297 }, { "epoch": 0.8158795345653662, "grad_norm": 3.9299166202545166, "learning_rate": 9.591780821917807e-07, "log_odds_chosen": -0.7520735859870911, "log_odds_ratio": -1.4010555744171143, "logits/chosen": -1.0520344972610474, "logits/rejected": -1.0033001899719238, "logps/chosen": -3.531013250350952, "logps/rejected": -2.769836902618408, "loss": 2.3876, "nll_loss": 2.2475199699401855, "rewards/accuracies": 0.25, "rewards/chosen": -0.35310131311416626, "rewards/margins": -0.07611764967441559, "rewards/rejected": -0.27698367834091187, "step": 298 }, { "epoch": 0.8186173853524983, "grad_norm": 3.539454936981201, "learning_rate": 9.59041095890411e-07, "log_odds_chosen": -0.4661966562271118, "log_odds_ratio": -1.1393344402313232, "logits/chosen": -1.026100993156433, "logits/rejected": -1.0335052013397217, "logps/chosen": -3.011216640472412, "logps/rejected": -2.533083915710449, "loss": 2.2816, "nll_loss": 2.167693614959717, "rewards/accuracies": 0.625, "rewards/chosen": -0.30112165212631226, "rewards/margins": -0.0478132888674736, "rewards/rejected": -0.25330838561058044, "step": 299 }, { "epoch": 0.8213552361396304, "grad_norm": 3.561617851257324, "learning_rate": 9.58904109589041e-07, "log_odds_chosen": 0.5750443339347839, "log_odds_ratio": -0.5252573490142822, "logits/chosen": -0.9718005061149597, "logits/rejected": -1.057446002960205, "logps/chosen": -2.663809299468994, "logps/rejected": -3.1822378635406494, "loss": 2.2485, "nll_loss": 2.1960222721099854, "rewards/accuracies": 0.75, "rewards/chosen": -0.2663809657096863, "rewards/margins": 0.05184285715222359, "rewards/rejected": -0.31822383403778076, "step": 300 }, { "epoch": 0.8240930869267625, "grad_norm": 3.2859184741973877, "learning_rate": 9.587671232876711e-07, "log_odds_chosen": -0.5567988157272339, "log_odds_ratio": -1.118471622467041, "logits/chosen": -0.953074038028717, "logits/rejected": -0.9500552415847778, "logps/chosen": -2.501701593399048, "logps/rejected": -1.962912678718567, "loss": 2.2198, "nll_loss": 2.107970714569092, "rewards/accuracies": 0.375, "rewards/chosen": -0.25017014145851135, "rewards/margins": -0.05387888103723526, "rewards/rejected": -0.1962912678718567, "step": 301 }, { "epoch": 0.8268309377138946, "grad_norm": 3.8508358001708984, "learning_rate": 9.586301369863013e-07, "log_odds_chosen": -0.6992652416229248, "log_odds_ratio": -1.3820264339447021, "logits/chosen": -0.8890694379806519, "logits/rejected": -0.8842575550079346, "logps/chosen": -3.4894967079162598, "logps/rejected": -2.773524045944214, "loss": 2.3223, "nll_loss": 2.184053659439087, "rewards/accuracies": 0.5, "rewards/chosen": -0.348949670791626, "rewards/margins": -0.07159726321697235, "rewards/rejected": -0.2773524224758148, "step": 302 }, { "epoch": 0.8295687885010267, "grad_norm": 3.84757137298584, "learning_rate": 9.584931506849313e-07, "log_odds_chosen": 0.07792197167873383, "log_odds_ratio": -0.8842697143554688, "logits/chosen": -0.9457278847694397, "logits/rejected": -0.9334440231323242, "logps/chosen": -2.6797738075256348, "logps/rejected": -2.758392333984375, "loss": 2.3536, "nll_loss": 2.265132427215576, "rewards/accuracies": 0.625, "rewards/chosen": -0.26797735691070557, "rewards/margins": 0.007861878722906113, "rewards/rejected": -0.27583926916122437, "step": 303 }, { "epoch": 0.8323066392881588, "grad_norm": 4.583820819854736, "learning_rate": 9.583561643835615e-07, "log_odds_chosen": -0.8405465483665466, "log_odds_ratio": -1.3612909317016602, "logits/chosen": -0.9688655138015747, "logits/rejected": -0.8972649574279785, "logps/chosen": -4.118002414703369, "logps/rejected": -3.2652461528778076, "loss": 2.4081, "nll_loss": 2.2719666957855225, "rewards/accuracies": 0.375, "rewards/chosen": -0.41180023550987244, "rewards/margins": -0.08527559787034988, "rewards/rejected": -0.32652464509010315, "step": 304 }, { "epoch": 0.8350444900752909, "grad_norm": 3.720000743865967, "learning_rate": 9.582191780821917e-07, "log_odds_chosen": -0.29219579696655273, "log_odds_ratio": -0.921619176864624, "logits/chosen": -0.9671617150306702, "logits/rejected": -0.9458392858505249, "logps/chosen": -2.8555307388305664, "logps/rejected": -2.5663580894470215, "loss": 2.2233, "nll_loss": 2.1311120986938477, "rewards/accuracies": 0.375, "rewards/chosen": -0.28555306792259216, "rewards/margins": -0.028917260468006134, "rewards/rejected": -0.2566358149051666, "step": 305 }, { "epoch": 0.837782340862423, "grad_norm": 2.9780025482177734, "learning_rate": 9.58082191780822e-07, "log_odds_chosen": -0.33879321813583374, "log_odds_ratio": -1.0008214712142944, "logits/chosen": -0.8059914112091064, "logits/rejected": -0.9274457097053528, "logps/chosen": -2.6724140644073486, "logps/rejected": -2.3183484077453613, "loss": 2.155, "nll_loss": 2.0548958778381348, "rewards/accuracies": 0.375, "rewards/chosen": -0.2672414183616638, "rewards/margins": -0.03540657088160515, "rewards/rejected": -0.23183484375476837, "step": 306 }, { "epoch": 0.840520191649555, "grad_norm": 3.170464277267456, "learning_rate": 9.57945205479452e-07, "log_odds_chosen": 0.7348558902740479, "log_odds_ratio": -0.40952268242836, "logits/chosen": -0.9305769801139832, "logits/rejected": -0.9949920773506165, "logps/chosen": -1.816580057144165, "logps/rejected": -2.42529296875, "loss": 2.0359, "nll_loss": 1.994899034500122, "rewards/accuracies": 1.0, "rewards/chosen": -0.18165799975395203, "rewards/margins": 0.06087128445506096, "rewards/rejected": -0.24252928793430328, "step": 307 }, { "epoch": 0.8432580424366872, "grad_norm": 3.445873737335205, "learning_rate": 9.578082191780821e-07, "log_odds_chosen": -0.27488625049591064, "log_odds_ratio": -1.0279541015625, "logits/chosen": -1.0081151723861694, "logits/rejected": -1.0448403358459473, "logps/chosen": -2.6412456035614014, "logps/rejected": -2.3586161136627197, "loss": 2.2393, "nll_loss": 2.1365041732788086, "rewards/accuracies": 0.375, "rewards/chosen": -0.2641245722770691, "rewards/margins": -0.028262948617339134, "rewards/rejected": -0.2358616143465042, "step": 308 }, { "epoch": 0.8459958932238193, "grad_norm": 3.321042776107788, "learning_rate": 9.576712328767123e-07, "log_odds_chosen": -0.01884445548057556, "log_odds_ratio": -0.8900073170661926, "logits/chosen": -0.9436532258987427, "logits/rejected": -1.0054177045822144, "logps/chosen": -2.543046712875366, "logps/rejected": -2.515146017074585, "loss": 2.18, "nll_loss": 2.091038227081299, "rewards/accuracies": 0.625, "rewards/chosen": -0.2543046772480011, "rewards/margins": -0.0027900710701942444, "rewards/rejected": -0.25151461362838745, "step": 309 }, { "epoch": 0.8487337440109514, "grad_norm": 3.227346897125244, "learning_rate": 9.575342465753423e-07, "log_odds_chosen": 0.6035524606704712, "log_odds_ratio": -0.7897660136222839, "logits/chosen": -0.9782559275627136, "logits/rejected": -1.0687673091888428, "logps/chosen": -2.2759158611297607, "logps/rejected": -2.8059306144714355, "loss": 2.0938, "nll_loss": 2.0147862434387207, "rewards/accuracies": 0.625, "rewards/chosen": -0.22759157419204712, "rewards/margins": 0.05300147831439972, "rewards/rejected": -0.28059303760528564, "step": 310 }, { "epoch": 0.8514715947980835, "grad_norm": 3.199756383895874, "learning_rate": 9.573972602739725e-07, "log_odds_chosen": 1.4624707698822021, "log_odds_ratio": -0.48811647295951843, "logits/chosen": -1.0943324565887451, "logits/rejected": -1.161963939666748, "logps/chosen": -1.9855345487594604, "logps/rejected": -3.356844425201416, "loss": 2.1233, "nll_loss": 2.074444532394409, "rewards/accuracies": 0.75, "rewards/chosen": -0.19855347275733948, "rewards/margins": 0.1371309757232666, "rewards/rejected": -0.3356844186782837, "step": 311 }, { "epoch": 0.8542094455852156, "grad_norm": 3.80973744392395, "learning_rate": 9.572602739726027e-07, "log_odds_chosen": -0.38964128494262695, "log_odds_ratio": -1.1487500667572021, "logits/chosen": -0.9964510202407837, "logits/rejected": -1.0180668830871582, "logps/chosen": -3.3840579986572266, "logps/rejected": -3.008199691772461, "loss": 2.3479, "nll_loss": 2.233072519302368, "rewards/accuracies": 0.5, "rewards/chosen": -0.3384058177471161, "rewards/margins": -0.03758583217859268, "rewards/rejected": -0.3008199632167816, "step": 312 }, { "epoch": 0.8569472963723477, "grad_norm": 3.579058885574341, "learning_rate": 9.57123287671233e-07, "log_odds_chosen": -0.15905779600143433, "log_odds_ratio": -1.0028804540634155, "logits/chosen": -0.9854365587234497, "logits/rejected": -1.0206098556518555, "logps/chosen": -2.959238052368164, "logps/rejected": -2.7490968704223633, "loss": 2.1939, "nll_loss": 2.0936367511749268, "rewards/accuracies": 0.5, "rewards/chosen": -0.29592379927635193, "rewards/margins": -0.02101409062743187, "rewards/rejected": -0.27490970492362976, "step": 313 }, { "epoch": 0.8596851471594799, "grad_norm": 3.964571714401245, "learning_rate": 9.56986301369863e-07, "log_odds_chosen": -1.490729808807373, "log_odds_ratio": -1.8470160961151123, "logits/chosen": -1.0457119941711426, "logits/rejected": -0.9734127521514893, "logps/chosen": -3.5260157585144043, "logps/rejected": -2.0960183143615723, "loss": 2.3394, "nll_loss": 2.154705047607422, "rewards/accuracies": 0.25, "rewards/chosen": -0.3526015281677246, "rewards/margins": -0.14299969375133514, "rewards/rejected": -0.20960183441638947, "step": 314 }, { "epoch": 0.8624229979466119, "grad_norm": 3.8923089504241943, "learning_rate": 9.568493150684931e-07, "log_odds_chosen": -0.4591875672340393, "log_odds_ratio": -1.113945722579956, "logits/chosen": -0.9136810302734375, "logits/rejected": -0.8743624687194824, "logps/chosen": -3.0735349655151367, "logps/rejected": -2.6083478927612305, "loss": 2.2758, "nll_loss": 2.1644387245178223, "rewards/accuracies": 0.5, "rewards/chosen": -0.30735349655151367, "rewards/margins": -0.046518683433532715, "rewards/rejected": -0.26083481311798096, "step": 315 }, { "epoch": 0.865160848733744, "grad_norm": 3.824277400970459, "learning_rate": 9.567123287671234e-07, "log_odds_chosen": -0.3199617862701416, "log_odds_ratio": -1.0423378944396973, "logits/chosen": -0.9012932777404785, "logits/rejected": -0.9323334693908691, "logps/chosen": -2.991448163986206, "logps/rejected": -2.6664834022521973, "loss": 2.2577, "nll_loss": 2.153499126434326, "rewards/accuracies": 0.5, "rewards/chosen": -0.29914480447769165, "rewards/margins": -0.03249648958444595, "rewards/rejected": -0.2666483521461487, "step": 316 }, { "epoch": 0.8678986995208761, "grad_norm": 3.2076244354248047, "learning_rate": 9.565753424657533e-07, "log_odds_chosen": 0.11038336157798767, "log_odds_ratio": -0.7342426776885986, "logits/chosen": -0.7776603102684021, "logits/rejected": -0.8670386672019958, "logps/chosen": -2.125005006790161, "logps/rejected": -2.1872644424438477, "loss": 2.1051, "nll_loss": 2.0316905975341797, "rewards/accuracies": 0.625, "rewards/chosen": -0.21250051259994507, "rewards/margins": 0.00622590072453022, "rewards/rejected": -0.21872642636299133, "step": 317 }, { "epoch": 0.8706365503080082, "grad_norm": 3.4905331134796143, "learning_rate": 9.564383561643836e-07, "log_odds_chosen": -0.07092199474573135, "log_odds_ratio": -0.8912386894226074, "logits/chosen": -0.9437543153762817, "logits/rejected": -0.9661699533462524, "logps/chosen": -2.704413414001465, "logps/rejected": -2.581939935684204, "loss": 2.1828, "nll_loss": 2.0936481952667236, "rewards/accuracies": 0.375, "rewards/chosen": -0.27044135332107544, "rewards/margins": -0.012247327715158463, "rewards/rejected": -0.2581940293312073, "step": 318 }, { "epoch": 0.8733744010951403, "grad_norm": 3.5022809505462646, "learning_rate": 9.563013698630138e-07, "log_odds_chosen": -0.5813791155815125, "log_odds_ratio": -1.3637340068817139, "logits/chosen": -1.0065340995788574, "logits/rejected": -1.046445608139038, "logps/chosen": -2.9549179077148438, "logps/rejected": -2.328094720840454, "loss": 2.1797, "nll_loss": 2.043306350708008, "rewards/accuracies": 0.625, "rewards/chosen": -0.2954917848110199, "rewards/margins": -0.06268232315778732, "rewards/rejected": -0.23280948400497437, "step": 319 }, { "epoch": 0.8761122518822724, "grad_norm": 3.471041202545166, "learning_rate": 9.561643835616437e-07, "log_odds_chosen": -0.03320537507534027, "log_odds_ratio": -0.8940731883049011, "logits/chosen": -1.1526068449020386, "logits/rejected": -1.0885909795761108, "logps/chosen": -2.7477223873138428, "logps/rejected": -2.732865333557129, "loss": 2.2731, "nll_loss": 2.183678150177002, "rewards/accuracies": 0.375, "rewards/chosen": -0.2747722566127777, "rewards/margins": -0.001485716551542282, "rewards/rejected": -0.27328652143478394, "step": 320 }, { "epoch": 0.8788501026694046, "grad_norm": 4.670004844665527, "learning_rate": 9.56027397260274e-07, "log_odds_chosen": -1.0779343843460083, "log_odds_ratio": -1.628936529159546, "logits/chosen": -0.968933641910553, "logits/rejected": -0.8892368078231812, "logps/chosen": -3.7919082641601562, "logps/rejected": -2.769840717315674, "loss": 2.4572, "nll_loss": 2.2943363189697266, "rewards/accuracies": 0.125, "rewards/chosen": -0.3791908025741577, "rewards/margins": -0.10220672190189362, "rewards/rejected": -0.2769840657711029, "step": 321 }, { "epoch": 0.8815879534565366, "grad_norm": 3.676405191421509, "learning_rate": 9.558904109589042e-07, "log_odds_chosen": 0.3218432068824768, "log_odds_ratio": -0.7821369767189026, "logits/chosen": -0.9503791332244873, "logits/rejected": -0.9707844853401184, "logps/chosen": -2.9040005207061768, "logps/rejected": -3.194795608520508, "loss": 2.2419, "nll_loss": 2.1637344360351562, "rewards/accuracies": 0.625, "rewards/chosen": -0.29040005803108215, "rewards/margins": 0.029079511761665344, "rewards/rejected": -0.3194795548915863, "step": 322 }, { "epoch": 0.8843258042436687, "grad_norm": 3.288336753845215, "learning_rate": 9.557534246575342e-07, "log_odds_chosen": -0.5168519020080566, "log_odds_ratio": -1.0855258703231812, "logits/chosen": -0.8686452507972717, "logits/rejected": -0.8891621828079224, "logps/chosen": -2.62304949760437, "logps/rejected": -2.1226048469543457, "loss": 2.1927, "nll_loss": 2.0841407775878906, "rewards/accuracies": 0.375, "rewards/chosen": -0.26230496168136597, "rewards/margins": -0.0500444695353508, "rewards/rejected": -0.21226048469543457, "step": 323 }, { "epoch": 0.8870636550308009, "grad_norm": 3.926224946975708, "learning_rate": 9.556164383561644e-07, "log_odds_chosen": -0.251565545797348, "log_odds_ratio": -1.051140308380127, "logits/chosen": -0.9849074482917786, "logits/rejected": -0.9829403162002563, "logps/chosen": -3.7635838985443115, "logps/rejected": -3.5007660388946533, "loss": 2.287, "nll_loss": 2.1818861961364746, "rewards/accuracies": 0.5, "rewards/chosen": -0.37635838985443115, "rewards/margins": -0.02628178521990776, "rewards/rejected": -0.3500766158103943, "step": 324 }, { "epoch": 0.8898015058179329, "grad_norm": 4.070827484130859, "learning_rate": 9.554794520547946e-07, "log_odds_chosen": -1.0103167295455933, "log_odds_ratio": -1.5659475326538086, "logits/chosen": -0.977282702922821, "logits/rejected": -0.9332056045532227, "logps/chosen": -4.018362998962402, "logps/rejected": -3.0063562393188477, "loss": 2.448, "nll_loss": 2.2914321422576904, "rewards/accuracies": 0.375, "rewards/chosen": -0.4018363058567047, "rewards/margins": -0.10120067745447159, "rewards/rejected": -0.3006356358528137, "step": 325 }, { "epoch": 0.892539356605065, "grad_norm": 3.285356044769287, "learning_rate": 9.553424657534246e-07, "log_odds_chosen": 0.2537267208099365, "log_odds_ratio": -0.9038188457489014, "logits/chosen": -0.8356842994689941, "logits/rejected": -0.8537954688072205, "logps/chosen": -2.570725440979004, "logps/rejected": -2.7611961364746094, "loss": 2.1415, "nll_loss": 2.051159143447876, "rewards/accuracies": 0.5, "rewards/chosen": -0.2570725381374359, "rewards/margins": 0.019047075882554054, "rewards/rejected": -0.2761196196079254, "step": 326 }, { "epoch": 0.8952772073921971, "grad_norm": 3.4856486320495605, "learning_rate": 9.552054794520548e-07, "log_odds_chosen": 0.998336672782898, "log_odds_ratio": -0.5914853811264038, "logits/chosen": -1.0287704467773438, "logits/rejected": -1.0308818817138672, "logps/chosen": -2.8045451641082764, "logps/rejected": -3.7640743255615234, "loss": 2.1784, "nll_loss": 2.1192188262939453, "rewards/accuracies": 0.625, "rewards/chosen": -0.28045451641082764, "rewards/margins": 0.09595292061567307, "rewards/rejected": -0.3764074444770813, "step": 327 }, { "epoch": 0.8980150581793293, "grad_norm": 3.6684389114379883, "learning_rate": 9.55068493150685e-07, "log_odds_chosen": -0.45111167430877686, "log_odds_ratio": -1.0320720672607422, "logits/chosen": -1.0783822536468506, "logits/rejected": -1.0752280950546265, "logps/chosen": -3.0829215049743652, "logps/rejected": -2.643568277359009, "loss": 2.2702, "nll_loss": 2.167018413543701, "rewards/accuracies": 0.25, "rewards/chosen": -0.3082921504974365, "rewards/margins": -0.043935321271419525, "rewards/rejected": -0.2643568217754364, "step": 328 }, { "epoch": 0.9007529089664613, "grad_norm": 3.7145824432373047, "learning_rate": 9.54931506849315e-07, "log_odds_chosen": -0.0851292759180069, "log_odds_ratio": -0.8463960289955139, "logits/chosen": -0.852031409740448, "logits/rejected": -0.8736850619316101, "logps/chosen": -3.354851722717285, "logps/rejected": -3.2197206020355225, "loss": 2.1594, "nll_loss": 2.0747225284576416, "rewards/accuracies": 0.5, "rewards/chosen": -0.33548521995544434, "rewards/margins": -0.013513151556253433, "rewards/rejected": -0.3219720423221588, "step": 329 }, { "epoch": 0.9034907597535934, "grad_norm": 3.2499871253967285, "learning_rate": 9.547945205479452e-07, "log_odds_chosen": -0.060540542006492615, "log_odds_ratio": -0.8590317964553833, "logits/chosen": -0.9297218322753906, "logits/rejected": -0.9878373146057129, "logps/chosen": -2.3181614875793457, "logps/rejected": -2.2200465202331543, "loss": 2.1406, "nll_loss": 2.054689645767212, "rewards/accuracies": 0.625, "rewards/chosen": -0.23181617259979248, "rewards/margins": -0.009811514988541603, "rewards/rejected": -0.22200465202331543, "step": 330 }, { "epoch": 0.9062286105407256, "grad_norm": 3.2923545837402344, "learning_rate": 9.546575342465752e-07, "log_odds_chosen": 1.638718843460083, "log_odds_ratio": -0.487784743309021, "logits/chosen": -1.0609593391418457, "logits/rejected": -1.1338919401168823, "logps/chosen": -2.267561912536621, "logps/rejected": -3.838728904724121, "loss": 2.0762, "nll_loss": 2.0274200439453125, "rewards/accuracies": 0.625, "rewards/chosen": -0.22675620019435883, "rewards/margins": 0.15711671113967896, "rewards/rejected": -0.3838728964328766, "step": 331 }, { "epoch": 0.9089664613278576, "grad_norm": 3.293761968612671, "learning_rate": 9.545205479452054e-07, "log_odds_chosen": -0.33041316270828247, "log_odds_ratio": -1.126910924911499, "logits/chosen": -0.8677581548690796, "logits/rejected": -0.8390703201293945, "logps/chosen": -2.7497615814208984, "logps/rejected": -2.3919484615325928, "loss": 2.1631, "nll_loss": 2.0504205226898193, "rewards/accuracies": 0.75, "rewards/chosen": -0.27497613430023193, "rewards/margins": -0.03578127175569534, "rewards/rejected": -0.2391948699951172, "step": 332 }, { "epoch": 0.9117043121149897, "grad_norm": 4.321270942687988, "learning_rate": 9.543835616438356e-07, "log_odds_chosen": -0.8334506750106812, "log_odds_ratio": -1.3129137754440308, "logits/chosen": -0.9747281670570374, "logits/rejected": -0.9287815093994141, "logps/chosen": -3.6261253356933594, "logps/rejected": -2.8194541931152344, "loss": 2.3405, "nll_loss": 2.2092275619506836, "rewards/accuracies": 0.25, "rewards/chosen": -0.3626125454902649, "rewards/margins": -0.08066714555025101, "rewards/rejected": -0.2819454073905945, "step": 333 }, { "epoch": 0.9144421629021219, "grad_norm": 3.541214942932129, "learning_rate": 9.542465753424656e-07, "log_odds_chosen": -0.3163500726222992, "log_odds_ratio": -1.0538712739944458, "logits/chosen": -1.0301103591918945, "logits/rejected": -1.065943717956543, "logps/chosen": -2.4878315925598145, "logps/rejected": -2.161041498184204, "loss": 2.2008, "nll_loss": 2.0953941345214844, "rewards/accuracies": 0.375, "rewards/chosen": -0.2487831711769104, "rewards/margins": -0.03267902880907059, "rewards/rejected": -0.21610413491725922, "step": 334 }, { "epoch": 0.917180013689254, "grad_norm": 2.951991319656372, "learning_rate": 9.541095890410958e-07, "log_odds_chosen": 0.4360852837562561, "log_odds_ratio": -0.6752796173095703, "logits/chosen": -0.9550142288208008, "logits/rejected": -1.0161421298980713, "logps/chosen": -2.1254019737243652, "logps/rejected": -2.5028927326202393, "loss": 2.0531, "nll_loss": 1.9855841398239136, "rewards/accuracies": 0.75, "rewards/chosen": -0.21254019439220428, "rewards/margins": 0.03774908930063248, "rewards/rejected": -0.25028929114341736, "step": 335 }, { "epoch": 0.919917864476386, "grad_norm": 3.868727684020996, "learning_rate": 9.53972602739726e-07, "log_odds_chosen": -1.3987928628921509, "log_odds_ratio": -1.9034311771392822, "logits/chosen": -0.9939253330230713, "logits/rejected": -0.9482483863830566, "logps/chosen": -4.355731964111328, "logps/rejected": -2.9436450004577637, "loss": 2.4151, "nll_loss": 2.224790573120117, "rewards/accuracies": 0.25, "rewards/chosen": -0.43557319045066833, "rewards/margins": -0.14120864868164062, "rewards/rejected": -0.2943645119667053, "step": 336 }, { "epoch": 0.9226557152635181, "grad_norm": 3.9660744667053223, "learning_rate": 9.538356164383562e-07, "log_odds_chosen": -0.7977336645126343, "log_odds_ratio": -1.221786379814148, "logits/chosen": -1.1417953968048096, "logits/rejected": -0.9807686805725098, "logps/chosen": -3.2432379722595215, "logps/rejected": -2.52378249168396, "loss": 2.2834, "nll_loss": 2.1611905097961426, "rewards/accuracies": 0.25, "rewards/chosen": -0.32432377338409424, "rewards/margins": -0.07194554060697556, "rewards/rejected": -0.2523782551288605, "step": 337 }, { "epoch": 0.9253935660506503, "grad_norm": 3.9998936653137207, "learning_rate": 9.536986301369862e-07, "log_odds_chosen": -0.8993024826049805, "log_odds_ratio": -1.3577370643615723, "logits/chosen": -1.035569429397583, "logits/rejected": -0.9956073760986328, "logps/chosen": -3.3550920486450195, "logps/rejected": -2.4856362342834473, "loss": 2.2402, "nll_loss": 2.104449987411499, "rewards/accuracies": 0.25, "rewards/chosen": -0.33550921082496643, "rewards/margins": -0.08694557845592499, "rewards/rejected": -0.24856363236904144, "step": 338 }, { "epoch": 0.9281314168377823, "grad_norm": 3.719860076904297, "learning_rate": 9.535616438356165e-07, "log_odds_chosen": -0.6147215366363525, "log_odds_ratio": -1.1278477907180786, "logits/chosen": -1.0527381896972656, "logits/rejected": -1.018919825553894, "logps/chosen": -2.865095615386963, "logps/rejected": -2.2678892612457275, "loss": 2.2385, "nll_loss": 2.1257057189941406, "rewards/accuracies": 0.25, "rewards/chosen": -0.28650957345962524, "rewards/margins": -0.05972064658999443, "rewards/rejected": -0.22678892314434052, "step": 339 }, { "epoch": 0.9308692676249144, "grad_norm": 3.856879711151123, "learning_rate": 9.534246575342465e-07, "log_odds_chosen": -0.499253511428833, "log_odds_ratio": -1.1667089462280273, "logits/chosen": -0.9218663573265076, "logits/rejected": -0.8884499073028564, "logps/chosen": -2.621151924133301, "logps/rejected": -2.11588978767395, "loss": 2.1923, "nll_loss": 2.075638771057129, "rewards/accuracies": 0.5, "rewards/chosen": -0.2621152400970459, "rewards/margins": -0.05052623152732849, "rewards/rejected": -0.21158897876739502, "step": 340 }, { "epoch": 0.9336071184120466, "grad_norm": 3.5531134605407715, "learning_rate": 9.532876712328767e-07, "log_odds_chosen": 0.20705196261405945, "log_odds_ratio": -0.7753924131393433, "logits/chosen": -0.9313263893127441, "logits/rejected": -0.9417048692703247, "logps/chosen": -3.025799036026001, "logps/rejected": -3.2102348804473877, "loss": 2.1794, "nll_loss": 2.1018128395080566, "rewards/accuracies": 0.625, "rewards/chosen": -0.3025799095630646, "rewards/margins": 0.01844361051917076, "rewards/rejected": -0.32102352380752563, "step": 341 }, { "epoch": 0.9363449691991786, "grad_norm": 4.584530830383301, "learning_rate": 9.531506849315069e-07, "log_odds_chosen": -0.816495418548584, "log_odds_ratio": -1.372056245803833, "logits/chosen": -0.971707820892334, "logits/rejected": -0.9333087205886841, "logps/chosen": -3.8768672943115234, "logps/rejected": -3.064661979675293, "loss": 2.3887, "nll_loss": 2.2514495849609375, "rewards/accuracies": 0.375, "rewards/chosen": -0.38768675923347473, "rewards/margins": -0.08122054487466812, "rewards/rejected": -0.3064662218093872, "step": 342 }, { "epoch": 0.9390828199863107, "grad_norm": 4.4132843017578125, "learning_rate": 9.530136986301369e-07, "log_odds_chosen": -0.9038107991218567, "log_odds_ratio": -1.3882243633270264, "logits/chosen": -1.0647501945495605, "logits/rejected": -1.0224559307098389, "logps/chosen": -3.77844500541687, "logps/rejected": -2.876931667327881, "loss": 2.2889, "nll_loss": 2.1500916481018066, "rewards/accuracies": 0.25, "rewards/chosen": -0.37784451246261597, "rewards/margins": -0.09015130996704102, "rewards/rejected": -0.28769320249557495, "step": 343 }, { "epoch": 0.9418206707734429, "grad_norm": 4.293425559997559, "learning_rate": 9.528767123287671e-07, "log_odds_chosen": -0.1643763780593872, "log_odds_ratio": -0.9694095253944397, "logits/chosen": -0.9912148714065552, "logits/rejected": -0.9394681453704834, "logps/chosen": -3.5901565551757812, "logps/rejected": -3.446491003036499, "loss": 2.2739, "nll_loss": 2.17691707611084, "rewards/accuracies": 0.375, "rewards/chosen": -0.35901567339897156, "rewards/margins": -0.014366526156663895, "rewards/rejected": -0.34464913606643677, "step": 344 }, { "epoch": 0.944558521560575, "grad_norm": 3.6714537143707275, "learning_rate": 9.527397260273973e-07, "log_odds_chosen": -0.4479857087135315, "log_odds_ratio": -1.1284223794937134, "logits/chosen": -1.025650978088379, "logits/rejected": -1.009161353111267, "logps/chosen": -2.588649272918701, "logps/rejected": -2.182760715484619, "loss": 2.1544, "nll_loss": 2.0415587425231934, "rewards/accuracies": 0.25, "rewards/chosen": -0.2588649392127991, "rewards/margins": -0.040588848292827606, "rewards/rejected": -0.21827608346939087, "step": 345 }, { "epoch": 0.947296372347707, "grad_norm": 3.3013832569122314, "learning_rate": 9.526027397260274e-07, "log_odds_chosen": 0.20788829028606415, "log_odds_ratio": -0.68381667137146, "logits/chosen": -0.9804473519325256, "logits/rejected": -1.0148472785949707, "logps/chosen": -2.4741599559783936, "logps/rejected": -2.646944284439087, "loss": 2.1895, "nll_loss": 2.12113618850708, "rewards/accuracies": 0.625, "rewards/chosen": -0.24741598963737488, "rewards/margins": 0.017278410494327545, "rewards/rejected": -0.2646944224834442, "step": 346 }, { "epoch": 0.9500342231348392, "grad_norm": 3.869952440261841, "learning_rate": 9.524657534246575e-07, "log_odds_chosen": -1.1402997970581055, "log_odds_ratio": -1.659835696220398, "logits/chosen": -1.0769810676574707, "logits/rejected": -1.062768816947937, "logps/chosen": -3.9907867908477783, "logps/rejected": -2.8391215801239014, "loss": 2.2467, "nll_loss": 2.080735445022583, "rewards/accuracies": 0.375, "rewards/chosen": -0.39907872676849365, "rewards/margins": -0.1151665449142456, "rewards/rejected": -0.28391215205192566, "step": 347 }, { "epoch": 0.9527720739219713, "grad_norm": 3.3100745677948, "learning_rate": 9.523287671232876e-07, "log_odds_chosen": 0.1071280837059021, "log_odds_ratio": -0.7327206134796143, "logits/chosen": -0.8734397292137146, "logits/rejected": -0.9524971842765808, "logps/chosen": -2.4139628410339355, "logps/rejected": -2.461883068084717, "loss": 2.0988, "nll_loss": 2.025508403778076, "rewards/accuracies": 0.625, "rewards/chosen": -0.24139629304409027, "rewards/margins": 0.004792032763361931, "rewards/rejected": -0.24618831276893616, "step": 348 }, { "epoch": 0.9555099247091033, "grad_norm": 3.902740955352783, "learning_rate": 9.521917808219178e-07, "log_odds_chosen": -0.5043842792510986, "log_odds_ratio": -1.130832314491272, "logits/chosen": -1.0550615787506104, "logits/rejected": -1.0080970525741577, "logps/chosen": -3.37798810005188, "logps/rejected": -2.870112895965576, "loss": 2.2274, "nll_loss": 2.1142845153808594, "rewards/accuracies": 0.375, "rewards/chosen": -0.3377987742424011, "rewards/margins": -0.05078751593828201, "rewards/rejected": -0.2870112657546997, "step": 349 }, { "epoch": 0.9582477754962354, "grad_norm": 3.505021095275879, "learning_rate": 9.520547945205479e-07, "log_odds_chosen": -0.5869097709655762, "log_odds_ratio": -1.212744951248169, "logits/chosen": -0.9061817526817322, "logits/rejected": -0.8858035802841187, "logps/chosen": -2.8561604022979736, "logps/rejected": -2.2814626693725586, "loss": 2.1477, "nll_loss": 2.0263938903808594, "rewards/accuracies": 0.5, "rewards/chosen": -0.28561604022979736, "rewards/margins": -0.057469770312309265, "rewards/rejected": -0.2281462550163269, "step": 350 }, { "epoch": 0.9609856262833676, "grad_norm": 4.438052654266357, "learning_rate": 9.51917808219178e-07, "log_odds_chosen": -1.026200294494629, "log_odds_ratio": -1.4746241569519043, "logits/chosen": -1.024693250656128, "logits/rejected": -0.9618432521820068, "logps/chosen": -4.155359745025635, "logps/rejected": -3.155189275741577, "loss": 2.296, "nll_loss": 2.148555040359497, "rewards/accuracies": 0.125, "rewards/chosen": -0.41553595662117004, "rewards/margins": -0.1000170037150383, "rewards/rejected": -0.31551897525787354, "step": 351 }, { "epoch": 0.9637234770704997, "grad_norm": 4.5305657386779785, "learning_rate": 9.517808219178082e-07, "log_odds_chosen": -1.5453792810440063, "log_odds_ratio": -1.8200536966323853, "logits/chosen": -1.0107040405273438, "logits/rejected": -0.9391548037528992, "logps/chosen": -4.21239709854126, "logps/rejected": -2.7086329460144043, "loss": 2.3299, "nll_loss": 2.147873878479004, "rewards/accuracies": 0.125, "rewards/chosen": -0.4212397336959839, "rewards/margins": -0.15037639439105988, "rewards/rejected": -0.2708633244037628, "step": 352 }, { "epoch": 0.9664613278576317, "grad_norm": 3.1848652362823486, "learning_rate": 9.516438356164384e-07, "log_odds_chosen": 0.3454008102416992, "log_odds_ratio": -0.6300203800201416, "logits/chosen": -0.8391733765602112, "logits/rejected": -0.9148231744766235, "logps/chosen": -2.328946590423584, "logps/rejected": -2.6082868576049805, "loss": 2.0529, "nll_loss": 1.989900827407837, "rewards/accuracies": 0.5, "rewards/chosen": -0.2328946590423584, "rewards/margins": 0.02793404832482338, "rewards/rejected": -0.26082873344421387, "step": 353 }, { "epoch": 0.9691991786447639, "grad_norm": 3.091400146484375, "learning_rate": 9.515068493150684e-07, "log_odds_chosen": 0.21595516800880432, "log_odds_ratio": -0.6254602074623108, "logits/chosen": -0.9634081721305847, "logits/rejected": -0.9816166162490845, "logps/chosen": -2.038471221923828, "logps/rejected": -2.2030258178710938, "loss": 1.9365, "nll_loss": 1.873962163925171, "rewards/accuracies": 0.75, "rewards/chosen": -0.20384712517261505, "rewards/margins": 0.01645546779036522, "rewards/rejected": -0.22030259668827057, "step": 354 }, { "epoch": 0.971937029431896, "grad_norm": 3.8517212867736816, "learning_rate": 9.513698630136986e-07, "log_odds_chosen": -0.6561025381088257, "log_odds_ratio": -1.2334685325622559, "logits/chosen": -1.0397520065307617, "logits/rejected": -1.0058008432388306, "logps/chosen": -3.332503318786621, "logps/rejected": -2.6611504554748535, "loss": 2.2398, "nll_loss": 2.1164231300354004, "rewards/accuracies": 0.5, "rewards/chosen": -0.33325034379959106, "rewards/margins": -0.0671352744102478, "rewards/rejected": -0.26611506938934326, "step": 355 }, { "epoch": 0.974674880219028, "grad_norm": 3.0079948902130127, "learning_rate": 9.512328767123288e-07, "log_odds_chosen": 0.6018101572990417, "log_odds_ratio": -0.7220184206962585, "logits/chosen": -0.7875670790672302, "logits/rejected": -0.9131230711936951, "logps/chosen": -2.2596235275268555, "logps/rejected": -2.808929204940796, "loss": 2.019, "nll_loss": 1.9468281269073486, "rewards/accuracies": 0.75, "rewards/chosen": -0.2259623408317566, "rewards/margins": 0.054930560290813446, "rewards/rejected": -0.28089290857315063, "step": 356 }, { "epoch": 0.9774127310061602, "grad_norm": 3.853184938430786, "learning_rate": 9.510958904109588e-07, "log_odds_chosen": -0.15490587055683136, "log_odds_ratio": -0.8312073945999146, "logits/chosen": -0.9931520223617554, "logits/rejected": -0.9721630811691284, "logps/chosen": -3.297973394393921, "logps/rejected": -3.1406943798065186, "loss": 2.1907, "nll_loss": 2.1075730323791504, "rewards/accuracies": 0.625, "rewards/chosen": -0.32979732751846313, "rewards/margins": -0.015727905556559563, "rewards/rejected": -0.3140694499015808, "step": 357 }, { "epoch": 0.9801505817932923, "grad_norm": 3.719359874725342, "learning_rate": 9.50958904109589e-07, "log_odds_chosen": 0.5166378021240234, "log_odds_ratio": -0.6443429589271545, "logits/chosen": -1.010141372680664, "logits/rejected": -1.0059983730316162, "logps/chosen": -2.8856382369995117, "logps/rejected": -3.2936177253723145, "loss": 2.1539, "nll_loss": 2.0895042419433594, "rewards/accuracies": 0.5, "rewards/chosen": -0.2885638475418091, "rewards/margins": 0.040797922760248184, "rewards/rejected": -0.32936176657676697, "step": 358 }, { "epoch": 0.9828884325804244, "grad_norm": 3.927682638168335, "learning_rate": 9.508219178082192e-07, "log_odds_chosen": -0.19584378600120544, "log_odds_ratio": -0.9996610879898071, "logits/chosen": -1.004349946975708, "logits/rejected": -0.9821492433547974, "logps/chosen": -3.1190567016601562, "logps/rejected": -2.9235172271728516, "loss": 2.2078, "nll_loss": 2.1078715324401855, "rewards/accuracies": 0.5, "rewards/chosen": -0.3119056820869446, "rewards/margins": -0.019553938880562782, "rewards/rejected": -0.29235172271728516, "step": 359 }, { "epoch": 0.9856262833675564, "grad_norm": 3.836886405944824, "learning_rate": 9.506849315068493e-07, "log_odds_chosen": -0.4785364270210266, "log_odds_ratio": -1.1126031875610352, "logits/chosen": -0.9639312028884888, "logits/rejected": -0.9553571939468384, "logps/chosen": -3.150930404663086, "logps/rejected": -2.682584762573242, "loss": 2.1783, "nll_loss": 2.0670828819274902, "rewards/accuracies": 0.375, "rewards/chosen": -0.3150930404663086, "rewards/margins": -0.046834588050842285, "rewards/rejected": -0.2682584524154663, "step": 360 }, { "epoch": 0.9883641341546886, "grad_norm": 3.192873239517212, "learning_rate": 9.505479452054794e-07, "log_odds_chosen": 0.5065741539001465, "log_odds_ratio": -0.5783319473266602, "logits/chosen": -0.9699341058731079, "logits/rejected": -0.9700990319252014, "logps/chosen": -2.5879788398742676, "logps/rejected": -3.0487942695617676, "loss": 2.0832, "nll_loss": 2.0253188610076904, "rewards/accuracies": 0.625, "rewards/chosen": -0.25879788398742676, "rewards/margins": 0.046081557869911194, "rewards/rejected": -0.30487942695617676, "step": 361 }, { "epoch": 0.9911019849418207, "grad_norm": 3.1174604892730713, "learning_rate": 9.504109589041095e-07, "log_odds_chosen": 0.15737605094909668, "log_odds_ratio": -0.7173986434936523, "logits/chosen": -1.0978518724441528, "logits/rejected": -1.1893243789672852, "logps/chosen": -2.0858192443847656, "logps/rejected": -2.1826257705688477, "loss": 2.0188, "nll_loss": 1.9470933675765991, "rewards/accuracies": 0.625, "rewards/chosen": -0.20858190953731537, "rewards/margins": 0.009680674411356449, "rewards/rejected": -0.21826256811618805, "step": 362 }, { "epoch": 0.9938398357289527, "grad_norm": 3.7923941612243652, "learning_rate": 9.502739726027397e-07, "log_odds_chosen": -0.18982191383838654, "log_odds_ratio": -0.901931881904602, "logits/chosen": -0.9689269065856934, "logits/rejected": -0.9554093480110168, "logps/chosen": -2.9671263694763184, "logps/rejected": -2.735927104949951, "loss": 2.1422, "nll_loss": 2.051989793777466, "rewards/accuracies": 0.375, "rewards/chosen": -0.29671263694763184, "rewards/margins": -0.023119905963540077, "rewards/rejected": -0.2735927402973175, "step": 363 }, { "epoch": 0.9965776865160849, "grad_norm": 3.0514273643493652, "learning_rate": 9.501369863013698e-07, "log_odds_chosen": 0.26484984159469604, "log_odds_ratio": -0.6830554604530334, "logits/chosen": -0.8357221484184265, "logits/rejected": -0.8955196142196655, "logps/chosen": -2.207141399383545, "logps/rejected": -2.4141809940338135, "loss": 1.9702, "nll_loss": 1.901855230331421, "rewards/accuracies": 0.75, "rewards/chosen": -0.22071415185928345, "rewards/margins": 0.020703954622149467, "rewards/rejected": -0.24141810834407806, "step": 364 }, { "epoch": 0.999315537303217, "grad_norm": 3.7870125770568848, "learning_rate": 9.499999999999999e-07, "log_odds_chosen": -0.1617327630519867, "log_odds_ratio": -0.8116494417190552, "logits/chosen": -1.039559006690979, "logits/rejected": -1.025919795036316, "logps/chosen": -2.967162609100342, "logps/rejected": -2.810795307159424, "loss": 2.1617, "nll_loss": 2.080522060394287, "rewards/accuracies": 0.5, "rewards/chosen": -0.29671627283096313, "rewards/margins": -0.01563674956560135, "rewards/rejected": -0.2810795307159424, "step": 365 }, { "epoch": 1.002053388090349, "grad_norm": 3.908010721206665, "learning_rate": 9.498630136986301e-07, "log_odds_chosen": -0.4259549379348755, "log_odds_ratio": -1.4375991821289062, "logits/chosen": -0.9431150555610657, "logits/rejected": -1.0079567432403564, "logps/chosen": -4.06486177444458, "logps/rejected": -3.6006085872650146, "loss": 2.1628, "nll_loss": 2.019040822982788, "rewards/accuracies": 0.375, "rewards/chosen": -0.4064861834049225, "rewards/margins": -0.046425290405750275, "rewards/rejected": -0.3600608706474304, "step": 366 }, { "epoch": 1.0047912388774811, "grad_norm": 4.201023578643799, "learning_rate": 9.497260273972603e-07, "log_odds_chosen": -0.8010306358337402, "log_odds_ratio": -1.4922723770141602, "logits/chosen": -0.9956115484237671, "logits/rejected": -0.978934109210968, "logps/chosen": -3.8007633686065674, "logps/rejected": -2.9469687938690186, "loss": 2.1399, "nll_loss": 1.9907176494598389, "rewards/accuracies": 0.375, "rewards/chosen": -0.3800763487815857, "rewards/margins": -0.0853794515132904, "rewards/rejected": -0.2946968972682953, "step": 367 }, { "epoch": 1.0075290896646132, "grad_norm": 3.736870527267456, "learning_rate": 9.495890410958903e-07, "log_odds_chosen": 0.5850272178649902, "log_odds_ratio": -0.8317313194274902, "logits/chosen": -0.9430620074272156, "logits/rejected": -1.0112955570220947, "logps/chosen": -2.6789774894714355, "logps/rejected": -3.1864800453186035, "loss": 2.0397, "nll_loss": 1.9565280675888062, "rewards/accuracies": 0.75, "rewards/chosen": -0.26789775490760803, "rewards/margins": 0.05075027793645859, "rewards/rejected": -0.31864801049232483, "step": 368 }, { "epoch": 1.0102669404517455, "grad_norm": 4.414582252502441, "learning_rate": 9.494520547945205e-07, "log_odds_chosen": -1.0586307048797607, "log_odds_ratio": -1.4308829307556152, "logits/chosen": -0.9582830667495728, "logits/rejected": -0.8856099843978882, "logps/chosen": -3.66981840133667, "logps/rejected": -2.6688623428344727, "loss": 2.3811, "nll_loss": 2.238022804260254, "rewards/accuracies": 0.25, "rewards/chosen": -0.3669818341732025, "rewards/margins": -0.10009559988975525, "rewards/rejected": -0.26688623428344727, "step": 369 }, { "epoch": 1.0130047912388775, "grad_norm": 3.7266623973846436, "learning_rate": 9.493150684931507e-07, "log_odds_chosen": -0.8826406002044678, "log_odds_ratio": -1.4592194557189941, "logits/chosen": -0.9134376645088196, "logits/rejected": -0.8992810249328613, "logps/chosen": -3.4396157264709473, "logps/rejected": -2.5734975337982178, "loss": 2.1853, "nll_loss": 2.0393295288085938, "rewards/accuracies": 0.375, "rewards/chosen": -0.34396159648895264, "rewards/margins": -0.08661181479692459, "rewards/rejected": -0.25734975934028625, "step": 370 }, { "epoch": 1.0157426420260096, "grad_norm": 3.9653799533843994, "learning_rate": 9.491780821917807e-07, "log_odds_chosen": -0.23574119806289673, "log_odds_ratio": -0.9115504026412964, "logits/chosen": -0.9855255484580994, "logits/rejected": -0.947848916053772, "logps/chosen": -3.214262008666992, "logps/rejected": -2.974229097366333, "loss": 2.1405, "nll_loss": 2.049358606338501, "rewards/accuracies": 0.375, "rewards/chosen": -0.32142624258995056, "rewards/margins": -0.024003321304917336, "rewards/rejected": -0.2974228858947754, "step": 371 }, { "epoch": 1.0184804928131417, "grad_norm": 3.117541790008545, "learning_rate": 9.490410958904109e-07, "log_odds_chosen": -0.07232921570539474, "log_odds_ratio": -0.8086071014404297, "logits/chosen": -0.8331071138381958, "logits/rejected": -0.8719759583473206, "logps/chosen": -2.0486204624176025, "logps/rejected": -1.94162917137146, "loss": 2.0105, "nll_loss": 1.9296256303787231, "rewards/accuracies": 0.625, "rewards/chosen": -0.20486204326152802, "rewards/margins": -0.010699130594730377, "rewards/rejected": -0.19416290521621704, "step": 372 }, { "epoch": 1.0212183436002737, "grad_norm": 4.397269248962402, "learning_rate": 9.489041095890411e-07, "log_odds_chosen": -2.1199216842651367, "log_odds_ratio": -2.411006212234497, "logits/chosen": -1.006920337677002, "logits/rejected": -0.8455904126167297, "logps/chosen": -4.03196382522583, "logps/rejected": -2.0318777561187744, "loss": 2.321, "nll_loss": 2.0798826217651367, "rewards/accuracies": 0.25, "rewards/chosen": -0.4031963646411896, "rewards/margins": -0.2000085860490799, "rewards/rejected": -0.20318779349327087, "step": 373 }, { "epoch": 1.0239561943874058, "grad_norm": 3.4017302989959717, "learning_rate": 9.487671232876712e-07, "log_odds_chosen": 0.14323006570339203, "log_odds_ratio": -0.8191068768501282, "logits/chosen": -0.8819147348403931, "logits/rejected": -0.9127877354621887, "logps/chosen": -2.3985958099365234, "logps/rejected": -2.472334861755371, "loss": 2.0359, "nll_loss": 1.953957200050354, "rewards/accuracies": 0.625, "rewards/chosen": -0.23985958099365234, "rewards/margins": 0.007373906672000885, "rewards/rejected": -0.24723349511623383, "step": 374 }, { "epoch": 1.0266940451745379, "grad_norm": 3.8670356273651123, "learning_rate": 9.486301369863013e-07, "log_odds_chosen": -0.7138810157775879, "log_odds_ratio": -1.3300950527191162, "logits/chosen": -1.0095854997634888, "logits/rejected": -0.9704385995864868, "logps/chosen": -3.3908445835113525, "logps/rejected": -2.7207956314086914, "loss": 2.2216, "nll_loss": 2.088590383529663, "rewards/accuracies": 0.5, "rewards/chosen": -0.3390844464302063, "rewards/margins": -0.06700490415096283, "rewards/rejected": -0.27207955718040466, "step": 375 }, { "epoch": 1.0294318959616702, "grad_norm": 3.3740227222442627, "learning_rate": 9.484931506849314e-07, "log_odds_chosen": 0.04887949675321579, "log_odds_ratio": -0.7626360654830933, "logits/chosen": -0.9311819076538086, "logits/rejected": -0.9491267800331116, "logps/chosen": -2.8983821868896484, "logps/rejected": -2.9008822441101074, "loss": 2.0609, "nll_loss": 1.9846270084381104, "rewards/accuracies": 0.375, "rewards/chosen": -0.2898382544517517, "rewards/margins": 0.00024997908622026443, "rewards/rejected": -0.2900881767272949, "step": 376 }, { "epoch": 1.0321697467488022, "grad_norm": 3.8844335079193115, "learning_rate": 9.483561643835616e-07, "log_odds_chosen": 0.005216449499130249, "log_odds_ratio": -1.0556920766830444, "logits/chosen": -0.9972928762435913, "logits/rejected": -1.0336523056030273, "logps/chosen": -3.0710742473602295, "logps/rejected": -3.039721965789795, "loss": 2.129, "nll_loss": 2.023468017578125, "rewards/accuracies": 0.5, "rewards/chosen": -0.30710741877555847, "rewards/margins": -0.0031352192163467407, "rewards/rejected": -0.30397218465805054, "step": 377 }, { "epoch": 1.0349075975359343, "grad_norm": 3.8167197704315186, "learning_rate": 9.482191780821917e-07, "log_odds_chosen": -0.575190544128418, "log_odds_ratio": -1.108001708984375, "logits/chosen": -0.9410148859024048, "logits/rejected": -0.942301332950592, "logps/chosen": -3.138530731201172, "logps/rejected": -2.56607723236084, "loss": 2.1299, "nll_loss": 2.019099712371826, "rewards/accuracies": 0.375, "rewards/chosen": -0.31385308504104614, "rewards/margins": -0.05724538117647171, "rewards/rejected": -0.25660771131515503, "step": 378 }, { "epoch": 1.0376454483230664, "grad_norm": 3.636759042739868, "learning_rate": 9.480821917808218e-07, "log_odds_chosen": -0.38384243845939636, "log_odds_ratio": -1.0192981958389282, "logits/chosen": -0.8925155401229858, "logits/rejected": -0.9095913171768188, "logps/chosen": -2.61468505859375, "logps/rejected": -2.2154338359832764, "loss": 2.0307, "nll_loss": 1.9288175106048584, "rewards/accuracies": 0.375, "rewards/chosen": -0.2614684998989105, "rewards/margins": -0.039925120770931244, "rewards/rejected": -0.22154340147972107, "step": 379 }, { "epoch": 1.0403832991101984, "grad_norm": 3.6204590797424316, "learning_rate": 9.47945205479452e-07, "log_odds_chosen": 0.032463669776916504, "log_odds_ratio": -1.1744623184204102, "logits/chosen": -0.9878273010253906, "logits/rejected": -1.0412724018096924, "logps/chosen": -3.722010612487793, "logps/rejected": -3.6987783908843994, "loss": 2.0879, "nll_loss": 1.970456838607788, "rewards/accuracies": 0.625, "rewards/chosen": -0.3722010850906372, "rewards/margins": -0.002323240041732788, "rewards/rejected": -0.3698778450489044, "step": 380 }, { "epoch": 1.0431211498973305, "grad_norm": 3.6169686317443848, "learning_rate": 9.478082191780822e-07, "log_odds_chosen": 0.032939404249191284, "log_odds_ratio": -0.7761796712875366, "logits/chosen": -0.9411478638648987, "logits/rejected": -0.981238603591919, "logps/chosen": -2.590812921524048, "logps/rejected": -2.5775537490844727, "loss": 2.0031, "nll_loss": 1.9254425764083862, "rewards/accuracies": 0.625, "rewards/chosen": -0.25908130407333374, "rewards/margins": -0.001325925812125206, "rewards/rejected": -0.2577553689479828, "step": 381 }, { "epoch": 1.0458590006844628, "grad_norm": 3.280975580215454, "learning_rate": 9.476712328767122e-07, "log_odds_chosen": 0.02846594899892807, "log_odds_ratio": -0.8418194055557251, "logits/chosen": -0.912202775478363, "logits/rejected": -0.9368677139282227, "logps/chosen": -2.507094383239746, "logps/rejected": -2.476456880569458, "loss": 2.0018, "nll_loss": 1.917654275894165, "rewards/accuracies": 0.625, "rewards/chosen": -0.2507094442844391, "rewards/margins": -0.003063756041228771, "rewards/rejected": -0.24764567613601685, "step": 382 }, { "epoch": 1.0485968514715949, "grad_norm": 3.3149287700653076, "learning_rate": 9.475342465753424e-07, "log_odds_chosen": 0.4258632957935333, "log_odds_ratio": -0.5095553398132324, "logits/chosen": -0.9425128698348999, "logits/rejected": -0.9972410202026367, "logps/chosen": -2.141939640045166, "logps/rejected": -2.5080697536468506, "loss": 1.9459, "nll_loss": 1.8949253559112549, "rewards/accuracies": 1.0, "rewards/chosen": -0.21419396996498108, "rewards/margins": 0.03661303222179413, "rewards/rejected": -0.250806987285614, "step": 383 }, { "epoch": 1.051334702258727, "grad_norm": 3.9570152759552, "learning_rate": 9.473972602739726e-07, "log_odds_chosen": 0.20773723721504211, "log_odds_ratio": -0.9492347240447998, "logits/chosen": -0.9029123783111572, "logits/rejected": -0.9480992555618286, "logps/chosen": -3.1678853034973145, "logps/rejected": -3.3412623405456543, "loss": 2.0846, "nll_loss": 1.9896306991577148, "rewards/accuracies": 0.5, "rewards/chosen": -0.31678852438926697, "rewards/margins": 0.017337728291749954, "rewards/rejected": -0.3341262638568878, "step": 384 }, { "epoch": 1.054072553045859, "grad_norm": 3.433403253555298, "learning_rate": 9.472602739726026e-07, "log_odds_chosen": -0.3207973837852478, "log_odds_ratio": -1.1187043190002441, "logits/chosen": -1.005059003829956, "logits/rejected": -1.0015991926193237, "logps/chosen": -2.8285281658172607, "logps/rejected": -2.4728026390075684, "loss": 2.0857, "nll_loss": 1.9737972021102905, "rewards/accuracies": 0.625, "rewards/chosen": -0.28285282850265503, "rewards/margins": -0.03557258099317551, "rewards/rejected": -0.24728024005889893, "step": 385 }, { "epoch": 1.056810403832991, "grad_norm": 3.4105615615844727, "learning_rate": 9.471232876712328e-07, "log_odds_chosen": 0.5931211709976196, "log_odds_ratio": -0.6687047481536865, "logits/chosen": -0.9576898813247681, "logits/rejected": -1.0031282901763916, "logps/chosen": -2.3056344985961914, "logps/rejected": -2.7889351844787598, "loss": 1.9549, "nll_loss": 1.8880497217178345, "rewards/accuracies": 0.625, "rewards/chosen": -0.2305634468793869, "rewards/margins": 0.048330068588256836, "rewards/rejected": -0.27889353036880493, "step": 386 }, { "epoch": 1.0595482546201231, "grad_norm": 4.292815685272217, "learning_rate": 9.46986301369863e-07, "log_odds_chosen": -1.871313452720642, "log_odds_ratio": -2.639655828475952, "logits/chosen": -0.9061291217803955, "logits/rejected": -0.9337098002433777, "logps/chosen": -4.506789207458496, "logps/rejected": -2.628279685974121, "loss": 2.1356, "nll_loss": 1.8716206550598145, "rewards/accuracies": 0.5, "rewards/chosen": -0.4506789445877075, "rewards/margins": -0.1878509521484375, "rewards/rejected": -0.26282799243927, "step": 387 }, { "epoch": 1.0622861054072552, "grad_norm": 3.6116631031036377, "learning_rate": 9.46849315068493e-07, "log_odds_chosen": -0.08445534110069275, "log_odds_ratio": -0.9124117493629456, "logits/chosen": -1.0297572612762451, "logits/rejected": -1.059419870376587, "logps/chosen": -3.0396533012390137, "logps/rejected": -2.931166172027588, "loss": 2.0597, "nll_loss": 1.9684345722198486, "rewards/accuracies": 0.375, "rewards/chosen": -0.30396535992622375, "rewards/margins": -0.010848725214600563, "rewards/rejected": -0.29311662912368774, "step": 388 }, { "epoch": 1.0650239561943875, "grad_norm": 3.9341485500335693, "learning_rate": 9.467123287671232e-07, "log_odds_chosen": -0.49558788537979126, "log_odds_ratio": -1.0385644435882568, "logits/chosen": -1.0411758422851562, "logits/rejected": -0.9961960911750793, "logps/chosen": -3.195068120956421, "logps/rejected": -2.715912342071533, "loss": 2.1866, "nll_loss": 2.0826990604400635, "rewards/accuracies": 0.5, "rewards/chosen": -0.31950682401657104, "rewards/margins": -0.047915585339069366, "rewards/rejected": -0.2715912461280823, "step": 389 }, { "epoch": 1.0677618069815196, "grad_norm": 3.884425640106201, "learning_rate": 9.465753424657534e-07, "log_odds_chosen": -0.1741136610507965, "log_odds_ratio": -0.9085536599159241, "logits/chosen": -0.9773316979408264, "logits/rejected": -0.9710670113563538, "logps/chosen": -3.016343832015991, "logps/rejected": -2.8443856239318848, "loss": 2.1903, "nll_loss": 2.0994863510131836, "rewards/accuracies": 0.5, "rewards/chosen": -0.30163440108299255, "rewards/margins": -0.017195837572216988, "rewards/rejected": -0.2844385504722595, "step": 390 }, { "epoch": 1.0704996577686516, "grad_norm": 3.689635753631592, "learning_rate": 9.464383561643835e-07, "log_odds_chosen": -1.2108596563339233, "log_odds_ratio": -1.8195500373840332, "logits/chosen": -1.0018954277038574, "logits/rejected": -1.0537962913513184, "logps/chosen": -3.781383991241455, "logps/rejected": -2.5205960273742676, "loss": 2.1266, "nll_loss": 1.944603443145752, "rewards/accuracies": 0.5, "rewards/chosen": -0.37813839316368103, "rewards/margins": -0.1260787844657898, "rewards/rejected": -0.25205957889556885, "step": 391 }, { "epoch": 1.0732375085557837, "grad_norm": 3.0370967388153076, "learning_rate": 9.463013698630136e-07, "log_odds_chosen": 0.20813114941120148, "log_odds_ratio": -0.6408668756484985, "logits/chosen": -0.8338201642036438, "logits/rejected": -0.8875648975372314, "logps/chosen": -2.151823043823242, "logps/rejected": -2.2975945472717285, "loss": 1.9113, "nll_loss": 1.8472015857696533, "rewards/accuracies": 0.75, "rewards/chosen": -0.2151823192834854, "rewards/margins": 0.014577141031622887, "rewards/rejected": -0.22975945472717285, "step": 392 }, { "epoch": 1.0759753593429158, "grad_norm": 3.9163572788238525, "learning_rate": 9.461643835616437e-07, "log_odds_chosen": -0.9380320310592651, "log_odds_ratio": -1.5112032890319824, "logits/chosen": -0.9210330247879028, "logits/rejected": -0.9030933380126953, "logps/chosen": -3.471928596496582, "logps/rejected": -2.550477981567383, "loss": 2.1152, "nll_loss": 1.964032530784607, "rewards/accuracies": 0.25, "rewards/chosen": -0.3471928536891937, "rewards/margins": -0.09214504063129425, "rewards/rejected": -0.25504782795906067, "step": 393 }, { "epoch": 1.0787132101300478, "grad_norm": 3.8041932582855225, "learning_rate": 9.460273972602739e-07, "log_odds_chosen": -0.23155894875526428, "log_odds_ratio": -0.9503270387649536, "logits/chosen": -0.9965782761573792, "logits/rejected": -0.9708678126335144, "logps/chosen": -2.8664495944976807, "logps/rejected": -2.5845279693603516, "loss": 1.9596, "nll_loss": 1.8645455837249756, "rewards/accuracies": 0.625, "rewards/chosen": -0.28664496541023254, "rewards/margins": -0.028192177414894104, "rewards/rejected": -0.25845280289649963, "step": 394 }, { "epoch": 1.0814510609171801, "grad_norm": 3.369856357574463, "learning_rate": 9.45890410958904e-07, "log_odds_chosen": 0.3109264075756073, "log_odds_ratio": -0.6831945180892944, "logits/chosen": -0.8902217149734497, "logits/rejected": -0.9858326315879822, "logps/chosen": -2.4858086109161377, "logps/rejected": -2.7615184783935547, "loss": 1.9783, "nll_loss": 1.9100075960159302, "rewards/accuracies": 0.5, "rewards/chosen": -0.24858085811138153, "rewards/margins": 0.027570974081754684, "rewards/rejected": -0.2761518359184265, "step": 395 }, { "epoch": 1.0841889117043122, "grad_norm": 3.723235607147217, "learning_rate": 9.457534246575341e-07, "log_odds_chosen": -0.7815985679626465, "log_odds_ratio": -1.4172755479812622, "logits/chosen": -1.0307778120040894, "logits/rejected": -1.0444658994674683, "logps/chosen": -3.435624361038208, "logps/rejected": -2.6556310653686523, "loss": 2.2102, "nll_loss": 2.0684728622436523, "rewards/accuracies": 0.5, "rewards/chosen": -0.34356242418289185, "rewards/margins": -0.07799933105707169, "rewards/rejected": -0.26556310057640076, "step": 396 }, { "epoch": 1.0869267624914443, "grad_norm": 3.6747026443481445, "learning_rate": 9.456164383561643e-07, "log_odds_chosen": 0.42897453904151917, "log_odds_ratio": -0.9162967205047607, "logits/chosen": -0.9370048642158508, "logits/rejected": -1.0456459522247314, "logps/chosen": -2.709646463394165, "logps/rejected": -3.08878493309021, "loss": 1.9883, "nll_loss": 1.8966617584228516, "rewards/accuracies": 0.625, "rewards/chosen": -0.2709646224975586, "rewards/margins": 0.03791387379169464, "rewards/rejected": -0.3088785409927368, "step": 397 }, { "epoch": 1.0896646132785763, "grad_norm": 3.266479730606079, "learning_rate": 9.454794520547946e-07, "log_odds_chosen": 0.5101697444915771, "log_odds_ratio": -0.668778657913208, "logits/chosen": -0.9106509685516357, "logits/rejected": -0.9583531618118286, "logps/chosen": -2.1872618198394775, "logps/rejected": -2.618866205215454, "loss": 2.0051, "nll_loss": 1.9382548332214355, "rewards/accuracies": 0.625, "rewards/chosen": -0.21872621774673462, "rewards/margins": 0.043160419911146164, "rewards/rejected": -0.2618866264820099, "step": 398 }, { "epoch": 1.0924024640657084, "grad_norm": 4.229532241821289, "learning_rate": 9.453424657534245e-07, "log_odds_chosen": -0.9155111908912659, "log_odds_ratio": -1.4836469888687134, "logits/chosen": -0.9647924900054932, "logits/rejected": -0.9734218120574951, "logps/chosen": -3.7888681888580322, "logps/rejected": -2.884831428527832, "loss": 2.1253, "nll_loss": 1.976938247680664, "rewards/accuracies": 0.25, "rewards/chosen": -0.3788868188858032, "rewards/margins": -0.09040370583534241, "rewards/rejected": -0.2884831428527832, "step": 399 }, { "epoch": 1.0951403148528405, "grad_norm": 2.924044609069824, "learning_rate": 9.452054794520548e-07, "log_odds_chosen": 0.7096055150032043, "log_odds_ratio": -0.4875897169113159, "logits/chosen": -0.8106814622879028, "logits/rejected": -0.9009420871734619, "logps/chosen": -2.08493709564209, "logps/rejected": -2.7391128540039062, "loss": 1.8955, "nll_loss": 1.846717119216919, "rewards/accuracies": 0.75, "rewards/chosen": -0.2084936946630478, "rewards/margins": 0.0654175728559494, "rewards/rejected": -0.2739112675189972, "step": 400 }, { "epoch": 1.0978781656399725, "grad_norm": 4.297476768493652, "learning_rate": 9.45068493150685e-07, "log_odds_chosen": -0.6515663266181946, "log_odds_ratio": -1.1637338399887085, "logits/chosen": -0.9667490124702454, "logits/rejected": -0.8798996210098267, "logps/chosen": -2.9125266075134277, "logps/rejected": -2.324878215789795, "loss": 2.1132, "nll_loss": 1.99678635597229, "rewards/accuracies": 0.375, "rewards/chosen": -0.29125267267227173, "rewards/margins": -0.05876484885811806, "rewards/rejected": -0.23248782753944397, "step": 401 }, { "epoch": 1.1006160164271048, "grad_norm": 4.134376049041748, "learning_rate": 9.44931506849315e-07, "log_odds_chosen": -0.9078437685966492, "log_odds_ratio": -1.5159587860107422, "logits/chosen": -0.9573487043380737, "logits/rejected": -0.920662522315979, "logps/chosen": -3.8848228454589844, "logps/rejected": -2.977454900741577, "loss": 2.1845, "nll_loss": 2.032939910888672, "rewards/accuracies": 0.5, "rewards/chosen": -0.38848230242729187, "rewards/margins": -0.09073685109615326, "rewards/rejected": -0.2977454662322998, "step": 402 }, { "epoch": 1.103353867214237, "grad_norm": 4.541252136230469, "learning_rate": 9.447945205479452e-07, "log_odds_chosen": -0.26630276441574097, "log_odds_ratio": -1.0444928407669067, "logits/chosen": -0.8899925947189331, "logits/rejected": -0.9179791212081909, "logps/chosen": -3.5806965827941895, "logps/rejected": -3.292022943496704, "loss": 2.0288, "nll_loss": 1.924393892288208, "rewards/accuracies": 0.5, "rewards/chosen": -0.35806965827941895, "rewards/margins": -0.028867367655038834, "rewards/rejected": -0.3292022943496704, "step": 403 }, { "epoch": 1.106091718001369, "grad_norm": 3.400132417678833, "learning_rate": 9.446575342465754e-07, "log_odds_chosen": 0.2431289702653885, "log_odds_ratio": -0.761867344379425, "logits/chosen": -0.9089571237564087, "logits/rejected": -0.9022093415260315, "logps/chosen": -2.6694395542144775, "logps/rejected": -2.8399252891540527, "loss": 2.0121, "nll_loss": 1.9359242916107178, "rewards/accuracies": 0.75, "rewards/chosen": -0.26694396138191223, "rewards/margins": 0.017048554494976997, "rewards/rejected": -0.2839925289154053, "step": 404 }, { "epoch": 1.108829568788501, "grad_norm": 3.35349702835083, "learning_rate": 9.445205479452055e-07, "log_odds_chosen": -0.10321371257305145, "log_odds_ratio": -0.7937582731246948, "logits/chosen": -1.0313023328781128, "logits/rejected": -1.0275828838348389, "logps/chosen": -2.7321887016296387, "logps/rejected": -2.6302835941314697, "loss": 1.9893, "nll_loss": 1.9098782539367676, "rewards/accuracies": 0.5, "rewards/chosen": -0.27321887016296387, "rewards/margins": -0.010190514847636223, "rewards/rejected": -0.2630283534526825, "step": 405 }, { "epoch": 1.111567419575633, "grad_norm": 3.378618001937866, "learning_rate": 9.443835616438356e-07, "log_odds_chosen": 0.6368343234062195, "log_odds_ratio": -0.8549987077713013, "logits/chosen": -0.914921224117279, "logits/rejected": -0.974553108215332, "logps/chosen": -2.617213726043701, "logps/rejected": -3.224368095397949, "loss": 1.8938, "nll_loss": 1.8082597255706787, "rewards/accuracies": 0.5, "rewards/chosen": -0.2617213726043701, "rewards/margins": 0.060715433210134506, "rewards/rejected": -0.3224368095397949, "step": 406 }, { "epoch": 1.1143052703627652, "grad_norm": 4.465366363525391, "learning_rate": 9.442465753424657e-07, "log_odds_chosen": -0.5570254325866699, "log_odds_ratio": -1.1324557065963745, "logits/chosen": -0.9550021886825562, "logits/rejected": -0.9301046133041382, "logps/chosen": -3.384762763977051, "logps/rejected": -2.82507061958313, "loss": 2.0634, "nll_loss": 1.950115442276001, "rewards/accuracies": 0.375, "rewards/chosen": -0.3384763300418854, "rewards/margins": -0.0559692345559597, "rewards/rejected": -0.282507061958313, "step": 407 }, { "epoch": 1.1170431211498972, "grad_norm": 4.004459857940674, "learning_rate": 9.441095890410959e-07, "log_odds_chosen": -0.5382084250450134, "log_odds_ratio": -1.0642797946929932, "logits/chosen": -1.0503575801849365, "logits/rejected": -1.0627429485321045, "logps/chosen": -3.300772190093994, "logps/rejected": -2.767669200897217, "loss": 2.0799, "nll_loss": 1.9735125303268433, "rewards/accuracies": 0.375, "rewards/chosen": -0.330077201128006, "rewards/margins": -0.05331026762723923, "rewards/rejected": -0.27676695585250854, "step": 408 }, { "epoch": 1.1197809719370295, "grad_norm": 4.703543186187744, "learning_rate": 9.43972602739726e-07, "log_odds_chosen": -1.3612135648727417, "log_odds_ratio": -1.6535627841949463, "logits/chosen": -1.0130244493484497, "logits/rejected": -0.9531778693199158, "logps/chosen": -4.376816749572754, "logps/rejected": -3.05259370803833, "loss": 2.2254, "nll_loss": 2.0600779056549072, "rewards/accuracies": 0.0, "rewards/chosen": -0.4376817047595978, "rewards/margins": -0.1324223428964615, "rewards/rejected": -0.3052593469619751, "step": 409 }, { "epoch": 1.1225188227241616, "grad_norm": 3.4288830757141113, "learning_rate": 9.438356164383561e-07, "log_odds_chosen": -0.2875592112541199, "log_odds_ratio": -0.9564872980117798, "logits/chosen": -0.8053601980209351, "logits/rejected": -0.8102849125862122, "logps/chosen": -2.67937970161438, "logps/rejected": -2.358147144317627, "loss": 1.9597, "nll_loss": 1.8640620708465576, "rewards/accuracies": 0.375, "rewards/chosen": -0.26793795824050903, "rewards/margins": -0.032123252749443054, "rewards/rejected": -0.23581470549106598, "step": 410 }, { "epoch": 1.1252566735112937, "grad_norm": 4.100811958312988, "learning_rate": 9.436986301369863e-07, "log_odds_chosen": -0.25316882133483887, "log_odds_ratio": -1.0723063945770264, "logits/chosen": -1.0206959247589111, "logits/rejected": -1.0309739112854004, "logps/chosen": -2.8976755142211914, "logps/rejected": -2.6614432334899902, "loss": 2.0174, "nll_loss": 1.9101234674453735, "rewards/accuracies": 0.375, "rewards/chosen": -0.2897675633430481, "rewards/margins": -0.023623252287507057, "rewards/rejected": -0.2661443054676056, "step": 411 }, { "epoch": 1.1279945242984257, "grad_norm": 3.5318777561187744, "learning_rate": 9.435616438356165e-07, "log_odds_chosen": -0.2753886580467224, "log_odds_ratio": -1.0155128240585327, "logits/chosen": -0.9856488108634949, "logits/rejected": -0.9806272983551025, "logps/chosen": -2.7930715084075928, "logps/rejected": -2.5120272636413574, "loss": 2.0126, "nll_loss": 1.9110406637191772, "rewards/accuracies": 0.5, "rewards/chosen": -0.27930715680122375, "rewards/margins": -0.028104446828365326, "rewards/rejected": -0.2512027323246002, "step": 412 }, { "epoch": 1.1307323750855578, "grad_norm": 3.696242570877075, "learning_rate": 9.434246575342465e-07, "log_odds_chosen": -0.0843755453824997, "log_odds_ratio": -0.8708043098449707, "logits/chosen": -1.021695852279663, "logits/rejected": -1.0111052989959717, "logps/chosen": -2.4728970527648926, "logps/rejected": -2.366790533065796, "loss": 1.9644, "nll_loss": 1.8773245811462402, "rewards/accuracies": 0.625, "rewards/chosen": -0.2472897171974182, "rewards/margins": -0.010610664263367653, "rewards/rejected": -0.2366790473461151, "step": 413 }, { "epoch": 1.1334702258726899, "grad_norm": 3.69380259513855, "learning_rate": 9.432876712328767e-07, "log_odds_chosen": -0.17924977838993073, "log_odds_ratio": -0.966170072555542, "logits/chosen": -0.9279091358184814, "logits/rejected": -0.907855749130249, "logps/chosen": -3.1341466903686523, "logps/rejected": -2.9547812938690186, "loss": 1.9734, "nll_loss": 1.8767704963684082, "rewards/accuracies": 0.375, "rewards/chosen": -0.31341469287872314, "rewards/margins": -0.017936525866389275, "rewards/rejected": -0.29547813534736633, "step": 414 }, { "epoch": 1.136208076659822, "grad_norm": 3.8510241508483887, "learning_rate": 9.431506849315069e-07, "log_odds_chosen": -0.23279675841331482, "log_odds_ratio": -1.0279197692871094, "logits/chosen": -0.9476776719093323, "logits/rejected": -0.937512218952179, "logps/chosen": -2.767728805541992, "logps/rejected": -2.509526252746582, "loss": 2.0378, "nll_loss": 1.9350097179412842, "rewards/accuracies": 0.375, "rewards/chosen": -0.2767728865146637, "rewards/margins": -0.025820281356573105, "rewards/rejected": -0.2509526014328003, "step": 415 }, { "epoch": 1.1389459274469542, "grad_norm": 4.013479232788086, "learning_rate": 9.430136986301369e-07, "log_odds_chosen": -0.49469029903411865, "log_odds_ratio": -1.124879240989685, "logits/chosen": -1.0207630395889282, "logits/rejected": -0.9615397453308105, "logps/chosen": -3.2416038513183594, "logps/rejected": -2.7177300453186035, "loss": 2.0243, "nll_loss": 1.9118616580963135, "rewards/accuracies": 0.375, "rewards/chosen": -0.3241603672504425, "rewards/margins": -0.052387360483407974, "rewards/rejected": -0.27177301049232483, "step": 416 }, { "epoch": 1.1416837782340863, "grad_norm": 3.704580545425415, "learning_rate": 9.428767123287671e-07, "log_odds_chosen": -0.29940667748451233, "log_odds_ratio": -0.9559765458106995, "logits/chosen": -0.9133734703063965, "logits/rejected": -0.9085409641265869, "logps/chosen": -3.1197636127471924, "logps/rejected": -2.8332157135009766, "loss": 2.1387, "nll_loss": 2.043121099472046, "rewards/accuracies": 0.5, "rewards/chosen": -0.3119763731956482, "rewards/margins": -0.028654808178544044, "rewards/rejected": -0.2833215594291687, "step": 417 }, { "epoch": 1.1444216290212184, "grad_norm": 3.8278086185455322, "learning_rate": 9.427397260273973e-07, "log_odds_chosen": 0.047882720828056335, "log_odds_ratio": -0.7324434518814087, "logits/chosen": -0.9280033111572266, "logits/rejected": -0.9767164587974548, "logps/chosen": -2.8972089290618896, "logps/rejected": -2.947025775909424, "loss": 2.0079, "nll_loss": 1.9346580505371094, "rewards/accuracies": 0.5, "rewards/chosen": -0.28972089290618896, "rewards/margins": 0.004981689155101776, "rewards/rejected": -0.29470258951187134, "step": 418 }, { "epoch": 1.1471594798083504, "grad_norm": 4.3392510414123535, "learning_rate": 9.426027397260274e-07, "log_odds_chosen": -0.14503028988838196, "log_odds_ratio": -0.8916075229644775, "logits/chosen": -0.9110120534896851, "logits/rejected": -0.9167876839637756, "logps/chosen": -3.080423355102539, "logps/rejected": -2.8894708156585693, "loss": 1.9679, "nll_loss": 1.8787211179733276, "rewards/accuracies": 0.625, "rewards/chosen": -0.30804234743118286, "rewards/margins": -0.019095247611403465, "rewards/rejected": -0.28894710540771484, "step": 419 }, { "epoch": 1.1498973305954825, "grad_norm": 3.5483367443084717, "learning_rate": 9.424657534246575e-07, "log_odds_chosen": 0.5449702739715576, "log_odds_ratio": -0.6755843162536621, "logits/chosen": -0.9382963180541992, "logits/rejected": -0.925865888595581, "logps/chosen": -2.6839797496795654, "logps/rejected": -3.1521964073181152, "loss": 1.8821, "nll_loss": 1.8145211935043335, "rewards/accuracies": 0.375, "rewards/chosen": -0.2683979570865631, "rewards/margins": 0.04682168364524841, "rewards/rejected": -0.3152196407318115, "step": 420 }, { "epoch": 1.1526351813826146, "grad_norm": 4.00389289855957, "learning_rate": 9.423287671232877e-07, "log_odds_chosen": -0.27942967414855957, "log_odds_ratio": -1.0490106344223022, "logits/chosen": -1.0617475509643555, "logits/rejected": -0.9998778104782104, "logps/chosen": -3.597550868988037, "logps/rejected": -3.3126754760742188, "loss": 2.1446, "nll_loss": 2.039722442626953, "rewards/accuracies": 0.375, "rewards/chosen": -0.3597550690174103, "rewards/margins": -0.028487496078014374, "rewards/rejected": -0.3312675654888153, "step": 421 }, { "epoch": 1.1553730321697468, "grad_norm": 3.434885025024414, "learning_rate": 9.421917808219178e-07, "log_odds_chosen": 0.04384416341781616, "log_odds_ratio": -0.8724478483200073, "logits/chosen": -0.9985008835792542, "logits/rejected": -1.0025405883789062, "logps/chosen": -2.7314364910125732, "logps/rejected": -2.7161264419555664, "loss": 1.929, "nll_loss": 1.8418045043945312, "rewards/accuracies": 0.625, "rewards/chosen": -0.2731436491012573, "rewards/margins": -0.001530987210571766, "rewards/rejected": -0.27161264419555664, "step": 422 }, { "epoch": 1.158110882956879, "grad_norm": 3.196267604827881, "learning_rate": 9.420547945205479e-07, "log_odds_chosen": 0.7471227645874023, "log_odds_ratio": -0.8472237586975098, "logits/chosen": -0.8803678750991821, "logits/rejected": -0.9453906416893005, "logps/chosen": -2.372014045715332, "logps/rejected": -3.0221776962280273, "loss": 1.9198, "nll_loss": 1.8350424766540527, "rewards/accuracies": 0.875, "rewards/chosen": -0.23720142245292664, "rewards/margins": 0.06501635909080505, "rewards/rejected": -0.3022177815437317, "step": 423 }, { "epoch": 1.160848733744011, "grad_norm": 3.6147186756134033, "learning_rate": 9.41917808219178e-07, "log_odds_chosen": 0.09304922819137573, "log_odds_ratio": -0.7383275032043457, "logits/chosen": -0.7753303050994873, "logits/rejected": -0.8015375137329102, "logps/chosen": -2.4312222003936768, "logps/rejected": -2.4710896015167236, "loss": 1.8836, "nll_loss": 1.8097915649414062, "rewards/accuracies": 0.625, "rewards/chosen": -0.24312222003936768, "rewards/margins": 0.003986736759543419, "rewards/rejected": -0.24710896611213684, "step": 424 }, { "epoch": 1.163586584531143, "grad_norm": 4.093167304992676, "learning_rate": 9.417808219178082e-07, "log_odds_chosen": 0.01580353081226349, "log_odds_ratio": -0.7684239149093628, "logits/chosen": -0.9857048988342285, "logits/rejected": -0.9676809310913086, "logps/chosen": -2.709198474884033, "logps/rejected": -2.697791814804077, "loss": 1.9901, "nll_loss": 1.9132843017578125, "rewards/accuracies": 0.625, "rewards/chosen": -0.2709198594093323, "rewards/margins": -0.001140686683356762, "rewards/rejected": -0.2697792053222656, "step": 425 }, { "epoch": 1.1663244353182751, "grad_norm": 4.253814220428467, "learning_rate": 9.416438356164384e-07, "log_odds_chosen": 0.07001325488090515, "log_odds_ratio": -1.2916748523712158, "logits/chosen": -0.9852561354637146, "logits/rejected": -0.9591344594955444, "logps/chosen": -4.045408725738525, "logps/rejected": -4.06275749206543, "loss": 2.0912, "nll_loss": 1.9619948863983154, "rewards/accuracies": 0.25, "rewards/chosen": -0.40454089641571045, "rewards/margins": 0.0017348937690258026, "rewards/rejected": -0.40627580881118774, "step": 426 }, { "epoch": 1.1690622861054072, "grad_norm": 3.7930121421813965, "learning_rate": 9.415068493150684e-07, "log_odds_chosen": -0.42020055651664734, "log_odds_ratio": -1.1947438716888428, "logits/chosen": -0.8730180859565735, "logits/rejected": -0.8667491674423218, "logps/chosen": -3.0750155448913574, "logps/rejected": -2.6531617641448975, "loss": 1.9775, "nll_loss": 1.858006477355957, "rewards/accuracies": 0.5, "rewards/chosen": -0.30750155448913574, "rewards/margins": -0.04218536987900734, "rewards/rejected": -0.2653161883354187, "step": 427 }, { "epoch": 1.1718001368925393, "grad_norm": 4.555682182312012, "learning_rate": 9.413698630136986e-07, "log_odds_chosen": -0.9890198707580566, "log_odds_ratio": -1.5421667098999023, "logits/chosen": -0.8786464929580688, "logits/rejected": -0.9151308536529541, "logps/chosen": -3.656768798828125, "logps/rejected": -2.6594696044921875, "loss": 2.0618, "nll_loss": 1.9075994491577148, "rewards/accuracies": 0.5, "rewards/chosen": -0.3656768798828125, "rewards/margins": -0.09972991049289703, "rewards/rejected": -0.2659469544887543, "step": 428 }, { "epoch": 1.1745379876796715, "grad_norm": 3.7204551696777344, "learning_rate": 9.412328767123288e-07, "log_odds_chosen": -0.07906913757324219, "log_odds_ratio": -0.9061985015869141, "logits/chosen": -0.9417262673377991, "logits/rejected": -0.9346026182174683, "logps/chosen": -2.60322904586792, "logps/rejected": -2.4797444343566895, "loss": 1.9387, "nll_loss": 1.8481018543243408, "rewards/accuracies": 0.625, "rewards/chosen": -0.2603229284286499, "rewards/margins": -0.01234847865998745, "rewards/rejected": -0.2479744404554367, "step": 429 }, { "epoch": 1.1772758384668036, "grad_norm": 4.178366661071777, "learning_rate": 9.410958904109588e-07, "log_odds_chosen": -0.20232906937599182, "log_odds_ratio": -0.9160199761390686, "logits/chosen": -0.8610104322433472, "logits/rejected": -0.8231719732284546, "logps/chosen": -3.048724889755249, "logps/rejected": -2.8167619705200195, "loss": 2.0644, "nll_loss": 1.9727873802185059, "rewards/accuracies": 0.5, "rewards/chosen": -0.3048724830150604, "rewards/margins": -0.02319628931581974, "rewards/rejected": -0.2816762328147888, "step": 430 }, { "epoch": 1.1800136892539357, "grad_norm": 3.142167568206787, "learning_rate": 9.40958904109589e-07, "log_odds_chosen": 0.411476731300354, "log_odds_ratio": -0.5532785654067993, "logits/chosen": -0.9245427846908569, "logits/rejected": -0.9415215253829956, "logps/chosen": -2.0352070331573486, "logps/rejected": -2.366818904876709, "loss": 1.7882, "nll_loss": 1.7328975200653076, "rewards/accuracies": 0.75, "rewards/chosen": -0.20352068543434143, "rewards/margins": 0.03316120430827141, "rewards/rejected": -0.23668189346790314, "step": 431 }, { "epoch": 1.1827515400410678, "grad_norm": 3.64583683013916, "learning_rate": 9.408219178082192e-07, "log_odds_chosen": -0.6252285838127136, "log_odds_ratio": -1.4312784671783447, "logits/chosen": -0.9047532081604004, "logits/rejected": -0.9363365769386292, "logps/chosen": -3.3382985591888428, "logps/rejected": -2.674100637435913, "loss": 1.9974, "nll_loss": 1.8542473316192627, "rewards/accuracies": 0.5, "rewards/chosen": -0.3338298797607422, "rewards/margins": -0.06641978025436401, "rewards/rejected": -0.2674100697040558, "step": 432 }, { "epoch": 1.1854893908281998, "grad_norm": 3.293267250061035, "learning_rate": 9.406849315068493e-07, "log_odds_chosen": 0.6077538728713989, "log_odds_ratio": -0.9059352874755859, "logits/chosen": -0.8957065343856812, "logits/rejected": -1.0071465969085693, "logps/chosen": -2.348954916000366, "logps/rejected": -2.8767917156219482, "loss": 1.9267, "nll_loss": 1.8361293077468872, "rewards/accuracies": 0.625, "rewards/chosen": -0.2348954975605011, "rewards/margins": 0.052783671766519547, "rewards/rejected": -0.28767916560173035, "step": 433 }, { "epoch": 1.1882272416153319, "grad_norm": 3.1903650760650635, "learning_rate": 9.405479452054794e-07, "log_odds_chosen": -0.33944806456565857, "log_odds_ratio": -0.9398101568222046, "logits/chosen": -1.0007567405700684, "logits/rejected": -1.044884204864502, "logps/chosen": -2.4493002891540527, "logps/rejected": -2.1172163486480713, "loss": 1.9754, "nll_loss": 1.8814136981964111, "rewards/accuracies": 0.625, "rewards/chosen": -0.24493002891540527, "rewards/margins": -0.03320838138461113, "rewards/rejected": -0.21172164380550385, "step": 434 }, { "epoch": 1.1909650924024642, "grad_norm": 4.5604143142700195, "learning_rate": 9.404109589041096e-07, "log_odds_chosen": -1.0325909852981567, "log_odds_ratio": -1.3883757591247559, "logits/chosen": -0.96143639087677, "logits/rejected": -0.9225080013275146, "logps/chosen": -3.3507134914398193, "logps/rejected": -2.372377634048462, "loss": 1.995, "nll_loss": 1.8562041521072388, "rewards/accuracies": 0.125, "rewards/chosen": -0.3350713849067688, "rewards/margins": -0.09783359616994858, "rewards/rejected": -0.23723778128623962, "step": 435 }, { "epoch": 1.1937029431895962, "grad_norm": 3.291177272796631, "learning_rate": 9.402739726027397e-07, "log_odds_chosen": 0.486358642578125, "log_odds_ratio": -0.6172692775726318, "logits/chosen": -0.8644670248031616, "logits/rejected": -0.986993134021759, "logps/chosen": -3.2643346786499023, "logps/rejected": -3.6650266647338867, "loss": 1.9081, "nll_loss": 1.8464082479476929, "rewards/accuracies": 0.625, "rewards/chosen": -0.3264334797859192, "rewards/margins": 0.04006919637322426, "rewards/rejected": -0.36650267243385315, "step": 436 }, { "epoch": 1.1964407939767283, "grad_norm": 3.5780959129333496, "learning_rate": 9.401369863013698e-07, "log_odds_chosen": 0.22941386699676514, "log_odds_ratio": -0.917752742767334, "logits/chosen": -1.0158848762512207, "logits/rejected": -1.0017664432525635, "logps/chosen": -2.708893060684204, "logps/rejected": -2.8408989906311035, "loss": 1.9786, "nll_loss": 1.8868207931518555, "rewards/accuracies": 0.5, "rewards/chosen": -0.2708893418312073, "rewards/margins": 0.01320059597492218, "rewards/rejected": -0.28408992290496826, "step": 437 }, { "epoch": 1.1991786447638604, "grad_norm": 3.716339588165283, "learning_rate": 9.399999999999999e-07, "log_odds_chosen": -0.43964970111846924, "log_odds_ratio": -1.0519341230392456, "logits/chosen": -0.9971828460693359, "logits/rejected": -1.0668160915374756, "logps/chosen": -3.2957918643951416, "logps/rejected": -2.8317995071411133, "loss": 1.9684, "nll_loss": 1.8632265329360962, "rewards/accuracies": 0.5, "rewards/chosen": -0.3295791745185852, "rewards/margins": -0.04639922082424164, "rewards/rejected": -0.28317996859550476, "step": 438 }, { "epoch": 1.2019164955509924, "grad_norm": 2.792707920074463, "learning_rate": 9.398630136986301e-07, "log_odds_chosen": 0.9129316806793213, "log_odds_ratio": -0.7532880902290344, "logits/chosen": -0.7196394801139832, "logits/rejected": -0.8258743286132812, "logps/chosen": -1.8724493980407715, "logps/rejected": -2.7148759365081787, "loss": 1.785, "nll_loss": 1.7096894979476929, "rewards/accuracies": 0.625, "rewards/chosen": -0.1872449517250061, "rewards/margins": 0.08424262702465057, "rewards/rejected": -0.27148759365081787, "step": 439 }, { "epoch": 1.2046543463381245, "grad_norm": 4.951113700866699, "learning_rate": 9.397260273972603e-07, "log_odds_chosen": -0.6142411231994629, "log_odds_ratio": -1.1505813598632812, "logits/chosen": -0.9217106103897095, "logits/rejected": -0.8399326801300049, "logps/chosen": -3.596770763397217, "logps/rejected": -3.0269904136657715, "loss": 2.0821, "nll_loss": 1.9670181274414062, "rewards/accuracies": 0.125, "rewards/chosen": -0.35967710614204407, "rewards/margins": -0.056978076696395874, "rewards/rejected": -0.3026990294456482, "step": 440 }, { "epoch": 1.2073921971252566, "grad_norm": 4.423344612121582, "learning_rate": 9.395890410958903e-07, "log_odds_chosen": -1.033639907836914, "log_odds_ratio": -1.8634461164474487, "logits/chosen": -0.888882040977478, "logits/rejected": -0.9150146245956421, "logps/chosen": -4.528703212738037, "logps/rejected": -3.479151725769043, "loss": 2.1047, "nll_loss": 1.9183945655822754, "rewards/accuracies": 0.375, "rewards/chosen": -0.45287030935287476, "rewards/margins": -0.10495512187480927, "rewards/rejected": -0.3479152023792267, "step": 441 }, { "epoch": 1.2101300479123887, "grad_norm": 4.114677906036377, "learning_rate": 9.394520547945205e-07, "log_odds_chosen": -0.5233542919158936, "log_odds_ratio": -1.0521697998046875, "logits/chosen": -0.9346010684967041, "logits/rejected": -0.9459203481674194, "logps/chosen": -3.34609055519104, "logps/rejected": -2.86643123626709, "loss": 1.9544, "nll_loss": 1.8492238521575928, "rewards/accuracies": 0.5, "rewards/chosen": -0.33460909128189087, "rewards/margins": -0.04796592891216278, "rewards/rejected": -0.2866431474685669, "step": 442 }, { "epoch": 1.212867898699521, "grad_norm": 3.6390013694763184, "learning_rate": 9.393150684931507e-07, "log_odds_chosen": -0.36086738109588623, "log_odds_ratio": -0.9481199979782104, "logits/chosen": -0.9989703893661499, "logits/rejected": -0.9794379472732544, "logps/chosen": -2.8020784854888916, "logps/rejected": -2.45806884765625, "loss": 1.9386, "nll_loss": 1.8438283205032349, "rewards/accuracies": 0.25, "rewards/chosen": -0.2802078425884247, "rewards/margins": -0.03440096974372864, "rewards/rejected": -0.24580687284469604, "step": 443 }, { "epoch": 1.215605749486653, "grad_norm": 3.869281530380249, "learning_rate": 9.391780821917807e-07, "log_odds_chosen": -0.47409093379974365, "log_odds_ratio": -1.3021128177642822, "logits/chosen": -0.7609426379203796, "logits/rejected": -0.7303049564361572, "logps/chosen": -3.011786460876465, "logps/rejected": -2.4787344932556152, "loss": 1.9172, "nll_loss": 1.7869799137115479, "rewards/accuracies": 0.625, "rewards/chosen": -0.30117863416671753, "rewards/margins": -0.053305208683013916, "rewards/rejected": -0.2478734403848648, "step": 444 }, { "epoch": 1.218343600273785, "grad_norm": 4.277957439422607, "learning_rate": 9.390410958904109e-07, "log_odds_chosen": -0.10094887018203735, "log_odds_ratio": -0.7960214614868164, "logits/chosen": -0.8531078696250916, "logits/rejected": -0.7828130722045898, "logps/chosen": -2.6124794483184814, "logps/rejected": -2.491469621658325, "loss": 1.914, "nll_loss": 1.834388256072998, "rewards/accuracies": 0.375, "rewards/chosen": -0.2612479329109192, "rewards/margins": -0.012100983411073685, "rewards/rejected": -0.249146968126297, "step": 445 }, { "epoch": 1.2210814510609171, "grad_norm": 4.131623268127441, "learning_rate": 9.389041095890411e-07, "log_odds_chosen": -0.12292099744081497, "log_odds_ratio": -0.8229446411132812, "logits/chosen": -0.8989929556846619, "logits/rejected": -0.9311378598213196, "logps/chosen": -2.519619941711426, "logps/rejected": -2.356677532196045, "loss": 1.9151, "nll_loss": 1.832849383354187, "rewards/accuracies": 0.5, "rewards/chosen": -0.25196200609207153, "rewards/margins": -0.01629423350095749, "rewards/rejected": -0.23566776514053345, "step": 446 }, { "epoch": 1.2238193018480492, "grad_norm": 3.0271055698394775, "learning_rate": 9.387671232876712e-07, "log_odds_chosen": 0.3429023325443268, "log_odds_ratio": -0.5701137185096741, "logits/chosen": -0.8242130279541016, "logits/rejected": -0.9134957194328308, "logps/chosen": -2.006155014038086, "logps/rejected": -2.292323589324951, "loss": 1.8052, "nll_loss": 1.7481498718261719, "rewards/accuracies": 0.625, "rewards/chosen": -0.20061549544334412, "rewards/margins": 0.02861684560775757, "rewards/rejected": -0.22923234105110168, "step": 447 }, { "epoch": 1.2265571526351815, "grad_norm": 3.2965919971466064, "learning_rate": 9.386301369863013e-07, "log_odds_chosen": 0.4205934405326843, "log_odds_ratio": -0.6258565187454224, "logits/chosen": -0.8108759522438049, "logits/rejected": -0.8366053104400635, "logps/chosen": -1.9639489650726318, "logps/rejected": -2.3286142349243164, "loss": 1.8461, "nll_loss": 1.7835168838500977, "rewards/accuracies": 0.75, "rewards/chosen": -0.1963948756456375, "rewards/margins": 0.036466531455516815, "rewards/rejected": -0.23286142945289612, "step": 448 }, { "epoch": 1.2292950034223136, "grad_norm": 4.023378372192383, "learning_rate": 9.384931506849315e-07, "log_odds_chosen": -0.051631033420562744, "log_odds_ratio": -0.8828964233398438, "logits/chosen": -0.8346832990646362, "logits/rejected": -0.8198440074920654, "logps/chosen": -2.962283134460449, "logps/rejected": -2.857346534729004, "loss": 1.9414, "nll_loss": 1.8530879020690918, "rewards/accuracies": 0.5, "rewards/chosen": -0.2962283492088318, "rewards/margins": -0.010493665933609009, "rewards/rejected": -0.2857346534729004, "step": 449 }, { "epoch": 1.2320328542094456, "grad_norm": 3.451204776763916, "learning_rate": 9.383561643835616e-07, "log_odds_chosen": 0.0559757798910141, "log_odds_ratio": -0.7496936321258545, "logits/chosen": -1.041049599647522, "logits/rejected": -1.125279426574707, "logps/chosen": -2.6752710342407227, "logps/rejected": -2.741370677947998, "loss": 1.8806, "nll_loss": 1.8056230545043945, "rewards/accuracies": 0.625, "rewards/chosen": -0.26752710342407227, "rewards/margins": 0.0066099632531404495, "rewards/rejected": -0.27413707971572876, "step": 450 }, { "epoch": 1.2347707049965777, "grad_norm": 4.840454578399658, "learning_rate": 9.382191780821917e-07, "log_odds_chosen": -0.6678401231765747, "log_odds_ratio": -1.3637971878051758, "logits/chosen": -0.8432908058166504, "logits/rejected": -0.8486170768737793, "logps/chosen": -3.7268595695495605, "logps/rejected": -3.0515055656433105, "loss": 1.9855, "nll_loss": 1.8491226434707642, "rewards/accuracies": 0.5, "rewards/chosen": -0.372685968875885, "rewards/margins": -0.0675354078412056, "rewards/rejected": -0.30515056848526, "step": 451 }, { "epoch": 1.2375085557837098, "grad_norm": 3.6299166679382324, "learning_rate": 9.380821917808219e-07, "log_odds_chosen": 0.14051735401153564, "log_odds_ratio": -0.7553268074989319, "logits/chosen": -0.8386212587356567, "logits/rejected": -0.9459216594696045, "logps/chosen": -2.3939390182495117, "logps/rejected": -2.482023239135742, "loss": 1.8637, "nll_loss": 1.788189172744751, "rewards/accuracies": 0.5, "rewards/chosen": -0.2393939048051834, "rewards/margins": 0.008808407932519913, "rewards/rejected": -0.24820229411125183, "step": 452 }, { "epoch": 1.2402464065708418, "grad_norm": 3.9476466178894043, "learning_rate": 9.37945205479452e-07, "log_odds_chosen": 0.13059863448143005, "log_odds_ratio": -0.8757564425468445, "logits/chosen": -0.8393001556396484, "logits/rejected": -0.9153112769126892, "logps/chosen": -2.716902732849121, "logps/rejected": -2.799515962600708, "loss": 1.8859, "nll_loss": 1.798301100730896, "rewards/accuracies": 0.625, "rewards/chosen": -0.2716902494430542, "rewards/margins": 0.008261313661932945, "rewards/rejected": -0.2799515724182129, "step": 453 }, { "epoch": 1.242984257357974, "grad_norm": 3.5365567207336426, "learning_rate": 9.378082191780822e-07, "log_odds_chosen": -0.2663125991821289, "log_odds_ratio": -0.8829459547996521, "logits/chosen": -1.0979920625686646, "logits/rejected": -1.1250625848770142, "logps/chosen": -2.594543933868408, "logps/rejected": -2.3370606899261475, "loss": 1.8882, "nll_loss": 1.7998689413070679, "rewards/accuracies": 0.5, "rewards/chosen": -0.2594544291496277, "rewards/margins": -0.025748347863554955, "rewards/rejected": -0.23370607197284698, "step": 454 }, { "epoch": 1.245722108145106, "grad_norm": 4.219146251678467, "learning_rate": 9.376712328767122e-07, "log_odds_chosen": -0.1289864331483841, "log_odds_ratio": -0.9016584157943726, "logits/chosen": -0.9573172330856323, "logits/rejected": -0.9958907961845398, "logps/chosen": -3.297942876815796, "logps/rejected": -3.1206774711608887, "loss": 1.963, "nll_loss": 1.87287437915802, "rewards/accuracies": 0.5, "rewards/chosen": -0.3297942876815796, "rewards/margins": -0.01772654987871647, "rewards/rejected": -0.31206774711608887, "step": 455 }, { "epoch": 1.2484599589322383, "grad_norm": 4.11430549621582, "learning_rate": 9.375342465753424e-07, "log_odds_chosen": -0.013968519866466522, "log_odds_ratio": -0.8793330192565918, "logits/chosen": -0.9525472521781921, "logits/rejected": -0.938325047492981, "logps/chosen": -2.4750423431396484, "logps/rejected": -2.4419074058532715, "loss": 1.9453, "nll_loss": 1.857388973236084, "rewards/accuracies": 0.375, "rewards/chosen": -0.24750421941280365, "rewards/margins": -0.003313470631837845, "rewards/rejected": -0.2441907525062561, "step": 456 }, { "epoch": 1.2511978097193703, "grad_norm": 4.087896347045898, "learning_rate": 9.373972602739726e-07, "log_odds_chosen": -0.04398024082183838, "log_odds_ratio": -0.9840128421783447, "logits/chosen": -0.9322932958602905, "logits/rejected": -0.9222823977470398, "logps/chosen": -3.382859230041504, "logps/rejected": -3.319317579269409, "loss": 1.9139, "nll_loss": 1.8154919147491455, "rewards/accuracies": 0.625, "rewards/chosen": -0.33828598260879517, "rewards/margins": -0.006354197859764099, "rewards/rejected": -0.3319317698478699, "step": 457 }, { "epoch": 1.2539356605065024, "grad_norm": 4.194822788238525, "learning_rate": 9.372602739726026e-07, "log_odds_chosen": -0.10395226627588272, "log_odds_ratio": -0.7929863333702087, "logits/chosen": -0.9619438648223877, "logits/rejected": -0.9529032707214355, "logps/chosen": -2.716660499572754, "logps/rejected": -2.6032822132110596, "loss": 1.8688, "nll_loss": 1.7894952297210693, "rewards/accuracies": 0.5, "rewards/chosen": -0.2716660499572754, "rewards/margins": -0.011337826028466225, "rewards/rejected": -0.2603282332420349, "step": 458 }, { "epoch": 1.2566735112936345, "grad_norm": 3.9570116996765137, "learning_rate": 9.371232876712328e-07, "log_odds_chosen": 0.03865058720111847, "log_odds_ratio": -0.7401283979415894, "logits/chosen": -0.989162802696228, "logits/rejected": -1.0158989429473877, "logps/chosen": -2.756401777267456, "logps/rejected": -2.764850378036499, "loss": 1.8601, "nll_loss": 1.7860959768295288, "rewards/accuracies": 0.5, "rewards/chosen": -0.27564018964767456, "rewards/margins": 0.0008448511362075806, "rewards/rejected": -0.27648505568504333, "step": 459 }, { "epoch": 1.2594113620807665, "grad_norm": 3.305462598800659, "learning_rate": 9.36986301369863e-07, "log_odds_chosen": 0.7927530407905579, "log_odds_ratio": -0.5428289771080017, "logits/chosen": -0.826488196849823, "logits/rejected": -1.0005433559417725, "logps/chosen": -2.092273712158203, "logps/rejected": -2.788909912109375, "loss": 1.8137, "nll_loss": 1.7593830823898315, "rewards/accuracies": 0.75, "rewards/chosen": -0.20922738313674927, "rewards/margins": 0.0696636289358139, "rewards/rejected": -0.278890997171402, "step": 460 }, { "epoch": 1.2621492128678988, "grad_norm": 3.2129340171813965, "learning_rate": 9.36849315068493e-07, "log_odds_chosen": 0.20451851189136505, "log_odds_ratio": -0.645418643951416, "logits/chosen": -0.8422325253486633, "logits/rejected": -0.8839700222015381, "logps/chosen": -2.0130233764648438, "logps/rejected": -2.197845935821533, "loss": 1.7491, "nll_loss": 1.6845695972442627, "rewards/accuracies": 0.625, "rewards/chosen": -0.20130234956741333, "rewards/margins": 0.018482251092791557, "rewards/rejected": -0.21978461742401123, "step": 461 }, { "epoch": 1.264887063655031, "grad_norm": 3.828618288040161, "learning_rate": 9.367123287671232e-07, "log_odds_chosen": 0.5583740472793579, "log_odds_ratio": -0.6824285387992859, "logits/chosen": -1.0036537647247314, "logits/rejected": -1.0443466901779175, "logps/chosen": -2.6296098232269287, "logps/rejected": -3.1372907161712646, "loss": 1.8425, "nll_loss": 1.774209976196289, "rewards/accuracies": 0.625, "rewards/chosen": -0.2629609704017639, "rewards/margins": 0.050768084824085236, "rewards/rejected": -0.31372904777526855, "step": 462 }, { "epoch": 1.267624914442163, "grad_norm": 3.24383807182312, "learning_rate": 9.365753424657534e-07, "log_odds_chosen": 1.1106573343276978, "log_odds_ratio": -0.3722325265407562, "logits/chosen": -0.7885444760322571, "logits/rejected": -0.870980441570282, "logps/chosen": -1.84746253490448, "logps/rejected": -2.849438428878784, "loss": 1.7168, "nll_loss": 1.6795628070831299, "rewards/accuracies": 0.875, "rewards/chosen": -0.18474626541137695, "rewards/margins": 0.10019759833812714, "rewards/rejected": -0.2849438488483429, "step": 463 }, { "epoch": 1.270362765229295, "grad_norm": 3.450366735458374, "learning_rate": 9.364383561643835e-07, "log_odds_chosen": 1.3937112092971802, "log_odds_ratio": -0.5401990413665771, "logits/chosen": -0.902138352394104, "logits/rejected": -0.989130973815918, "logps/chosen": -2.2362871170043945, "logps/rejected": -3.557910919189453, "loss": 1.788, "nll_loss": 1.733980655670166, "rewards/accuracies": 0.75, "rewards/chosen": -0.22362872958183289, "rewards/margins": 0.13216234743595123, "rewards/rejected": -0.3557910919189453, "step": 464 }, { "epoch": 1.273100616016427, "grad_norm": 5.672948360443115, "learning_rate": 9.363013698630136e-07, "log_odds_chosen": -1.4469393491744995, "log_odds_ratio": -1.6929049491882324, "logits/chosen": -0.9120296239852905, "logits/rejected": -0.8185737729072571, "logps/chosen": -4.492516994476318, "logps/rejected": -3.0877795219421387, "loss": 2.0053, "nll_loss": 1.8360235691070557, "rewards/accuracies": 0.0, "rewards/chosen": -0.4492516815662384, "rewards/margins": -0.14047372341156006, "rewards/rejected": -0.30877795815467834, "step": 465 }, { "epoch": 1.2758384668035592, "grad_norm": 3.9943623542785645, "learning_rate": 9.361643835616438e-07, "log_odds_chosen": -0.44088006019592285, "log_odds_ratio": -1.1150579452514648, "logits/chosen": -0.913457989692688, "logits/rejected": -0.9585485458374023, "logps/chosen": -3.250300407409668, "logps/rejected": -2.7773051261901855, "loss": 1.8509, "nll_loss": 1.7394014596939087, "rewards/accuracies": 0.5, "rewards/chosen": -0.32503005862236023, "rewards/margins": -0.047299545258283615, "rewards/rejected": -0.2777305245399475, "step": 466 }, { "epoch": 1.2785763175906912, "grad_norm": 3.843128204345703, "learning_rate": 9.360273972602739e-07, "log_odds_chosen": -0.35470345616340637, "log_odds_ratio": -1.0251595973968506, "logits/chosen": -1.0255475044250488, "logits/rejected": -1.0182117223739624, "logps/chosen": -3.097752094268799, "logps/rejected": -2.737415313720703, "loss": 1.8236, "nll_loss": 1.7210397720336914, "rewards/accuracies": 0.375, "rewards/chosen": -0.3097752034664154, "rewards/margins": -0.036033689975738525, "rewards/rejected": -0.2737415134906769, "step": 467 }, { "epoch": 1.2813141683778233, "grad_norm": 5.2299580574035645, "learning_rate": 9.358904109589041e-07, "log_odds_chosen": -1.2145040035247803, "log_odds_ratio": -1.7046483755111694, "logits/chosen": -0.9026891589164734, "logits/rejected": -0.8774552345275879, "logps/chosen": -4.434922218322754, "logps/rejected": -3.2303154468536377, "loss": 1.9878, "nll_loss": 1.8172906637191772, "rewards/accuracies": 0.25, "rewards/chosen": -0.44349223375320435, "rewards/margins": -0.12046068906784058, "rewards/rejected": -0.32303154468536377, "step": 468 }, { "epoch": 1.2840520191649554, "grad_norm": 3.8782119750976562, "learning_rate": 9.357534246575341e-07, "log_odds_chosen": -0.16448958218097687, "log_odds_ratio": -0.9684576392173767, "logits/chosen": -0.9601395726203918, "logits/rejected": -0.9511026740074158, "logps/chosen": -2.855293035507202, "logps/rejected": -2.6701409816741943, "loss": 1.8706, "nll_loss": 1.7737096548080444, "rewards/accuracies": 0.375, "rewards/chosen": -0.28552931547164917, "rewards/margins": -0.01851522922515869, "rewards/rejected": -0.2670140862464905, "step": 469 }, { "epoch": 1.2867898699520877, "grad_norm": 3.5569701194763184, "learning_rate": 9.356164383561643e-07, "log_odds_chosen": 0.4481412172317505, "log_odds_ratio": -0.6911733746528625, "logits/chosen": -0.9821241497993469, "logits/rejected": -0.9667285680770874, "logps/chosen": -2.4121713638305664, "logps/rejected": -2.7840452194213867, "loss": 1.8461, "nll_loss": 1.7770285606384277, "rewards/accuracies": 0.5, "rewards/chosen": -0.24121716618537903, "rewards/margins": 0.03718734160065651, "rewards/rejected": -0.2784045338630676, "step": 470 }, { "epoch": 1.2895277207392197, "grad_norm": 4.657106399536133, "learning_rate": 9.354794520547945e-07, "log_odds_chosen": 0.42293301224708557, "log_odds_ratio": -0.7235844135284424, "logits/chosen": -0.9289930462837219, "logits/rejected": -0.9308421015739441, "logps/chosen": -3.0031371116638184, "logps/rejected": -3.3727474212646484, "loss": 1.8266, "nll_loss": 1.7542768716812134, "rewards/accuracies": 0.75, "rewards/chosen": -0.30031371116638184, "rewards/margins": 0.03696101903915405, "rewards/rejected": -0.3372747302055359, "step": 471 }, { "epoch": 1.2922655715263518, "grad_norm": 4.104803562164307, "learning_rate": 9.353424657534245e-07, "log_odds_chosen": -0.7181039452552795, "log_odds_ratio": -1.1457899808883667, "logits/chosen": -1.0138988494873047, "logits/rejected": -0.9983909130096436, "logps/chosen": -2.951328754425049, "logps/rejected": -2.273860454559326, "loss": 1.9253, "nll_loss": 1.810694932937622, "rewards/accuracies": 0.25, "rewards/chosen": -0.29513290524482727, "rewards/margins": -0.06774687021970749, "rewards/rejected": -0.22738602757453918, "step": 472 }, { "epoch": 1.2950034223134839, "grad_norm": 4.163974285125732, "learning_rate": 9.352054794520547e-07, "log_odds_chosen": -0.4811266362667084, "log_odds_ratio": -1.033942461013794, "logits/chosen": -0.9857533574104309, "logits/rejected": -0.9326542019844055, "logps/chosen": -2.966418504714966, "logps/rejected": -2.5165672302246094, "loss": 1.9078, "nll_loss": 1.8043951988220215, "rewards/accuracies": 0.25, "rewards/chosen": -0.29664185643196106, "rewards/margins": -0.04498513042926788, "rewards/rejected": -0.251656711101532, "step": 473 }, { "epoch": 1.2977412731006162, "grad_norm": 4.377518653869629, "learning_rate": 9.350684931506849e-07, "log_odds_chosen": -0.33002883195877075, "log_odds_ratio": -1.103631615638733, "logits/chosen": -0.9496587514877319, "logits/rejected": -0.9351399540901184, "logps/chosen": -3.0006470680236816, "logps/rejected": -2.694063425064087, "loss": 1.8604, "nll_loss": 1.7500700950622559, "rewards/accuracies": 0.25, "rewards/chosen": -0.30006471276283264, "rewards/margins": -0.03065834939479828, "rewards/rejected": -0.26940634846687317, "step": 474 }, { "epoch": 1.3004791238877482, "grad_norm": 3.665989637374878, "learning_rate": 9.349315068493149e-07, "log_odds_chosen": -0.6708067655563354, "log_odds_ratio": -1.202699065208435, "logits/chosen": -0.9102855920791626, "logits/rejected": -0.9520264267921448, "logps/chosen": -2.9461569786071777, "logps/rejected": -2.294254779815674, "loss": 1.8983, "nll_loss": 1.7780163288116455, "rewards/accuracies": 0.5, "rewards/chosen": -0.2946156859397888, "rewards/margins": -0.06519021838903427, "rewards/rejected": -0.22942546010017395, "step": 475 }, { "epoch": 1.3032169746748803, "grad_norm": 3.899888038635254, "learning_rate": 9.347945205479451e-07, "log_odds_chosen": -0.00018849968910217285, "log_odds_ratio": -0.8839226961135864, "logits/chosen": -0.9508797526359558, "logits/rejected": -0.9968738555908203, "logps/chosen": -2.4970192909240723, "logps/rejected": -2.4682984352111816, "loss": 1.8281, "nll_loss": 1.7397133111953735, "rewards/accuracies": 0.625, "rewards/chosen": -0.24970193207263947, "rewards/margins": -0.002872064709663391, "rewards/rejected": -0.24682986736297607, "step": 476 }, { "epoch": 1.3059548254620124, "grad_norm": 5.243633270263672, "learning_rate": 9.346575342465753e-07, "log_odds_chosen": -0.6070296764373779, "log_odds_ratio": -1.256559133529663, "logits/chosen": -0.9384996294975281, "logits/rejected": -0.9263999462127686, "logps/chosen": -3.3609349727630615, "logps/rejected": -2.7671313285827637, "loss": 1.8757, "nll_loss": 1.7500405311584473, "rewards/accuracies": 0.5, "rewards/chosen": -0.3360934853553772, "rewards/margins": -0.05938034504652023, "rewards/rejected": -0.27671313285827637, "step": 477 }, { "epoch": 1.3086926762491444, "grad_norm": 4.664058208465576, "learning_rate": 9.345205479452054e-07, "log_odds_chosen": -0.8643841743469238, "log_odds_ratio": -1.3338806629180908, "logits/chosen": -0.9030283093452454, "logits/rejected": -0.8812140226364136, "logps/chosen": -3.827991247177124, "logps/rejected": -2.9856667518615723, "loss": 1.945, "nll_loss": 1.8116410970687866, "rewards/accuracies": 0.375, "rewards/chosen": -0.3827991187572479, "rewards/margins": -0.08423246443271637, "rewards/rejected": -0.29856666922569275, "step": 478 }, { "epoch": 1.3114305270362765, "grad_norm": 4.7929487228393555, "learning_rate": 9.343835616438355e-07, "log_odds_chosen": -0.9201123714447021, "log_odds_ratio": -1.443932056427002, "logits/chosen": -0.8388571739196777, "logits/rejected": -0.831200122833252, "logps/chosen": -3.42616868019104, "logps/rejected": -2.5138773918151855, "loss": 1.9228, "nll_loss": 1.7784028053283691, "rewards/accuracies": 0.375, "rewards/chosen": -0.34261688590049744, "rewards/margins": -0.09122911840677261, "rewards/rejected": -0.2513877749443054, "step": 479 }, { "epoch": 1.3141683778234086, "grad_norm": 4.122684478759766, "learning_rate": 9.342465753424658e-07, "log_odds_chosen": 0.0025262534618377686, "log_odds_ratio": -0.9677324295043945, "logits/chosen": -0.9119702577590942, "logits/rejected": -0.8923804759979248, "logps/chosen": -2.8234105110168457, "logps/rejected": -2.722898483276367, "loss": 1.8231, "nll_loss": 1.726320743560791, "rewards/accuracies": 0.625, "rewards/chosen": -0.2823410630226135, "rewards/margins": -0.010051192715764046, "rewards/rejected": -0.27228987216949463, "step": 480 }, { "epoch": 1.3169062286105406, "grad_norm": 4.009152889251709, "learning_rate": 9.341095890410959e-07, "log_odds_chosen": -0.10119867324829102, "log_odds_ratio": -0.9432282447814941, "logits/chosen": -0.84409499168396, "logits/rejected": -0.8278098106384277, "logps/chosen": -2.969961166381836, "logps/rejected": -2.8399877548217773, "loss": 1.7986, "nll_loss": 1.704313039779663, "rewards/accuracies": 0.75, "rewards/chosen": -0.2969961166381836, "rewards/margins": -0.012997319921851158, "rewards/rejected": -0.2839987874031067, "step": 481 }, { "epoch": 1.3196440793976727, "grad_norm": 4.129917144775391, "learning_rate": 9.33972602739726e-07, "log_odds_chosen": -0.31976941227912903, "log_odds_ratio": -1.0784516334533691, "logits/chosen": -0.974651038646698, "logits/rejected": -0.9537992477416992, "logps/chosen": -2.7868990898132324, "logps/rejected": -2.5216150283813477, "loss": 1.8531, "nll_loss": 1.7452754974365234, "rewards/accuracies": 0.5, "rewards/chosen": -0.2786899209022522, "rewards/margins": -0.02652841806411743, "rewards/rejected": -0.25216150283813477, "step": 482 }, { "epoch": 1.322381930184805, "grad_norm": 4.167330265045166, "learning_rate": 9.338356164383562e-07, "log_odds_chosen": -0.7557210922241211, "log_odds_ratio": -1.416776180267334, "logits/chosen": -1.0126152038574219, "logits/rejected": -1.0544315576553345, "logps/chosen": -3.582162857055664, "logps/rejected": -2.812786340713501, "loss": 1.9205, "nll_loss": 1.7788300514221191, "rewards/accuracies": 0.5, "rewards/chosen": -0.3582163155078888, "rewards/margins": -0.07693766057491302, "rewards/rejected": -0.2812786400318146, "step": 483 }, { "epoch": 1.325119780971937, "grad_norm": 5.309804916381836, "learning_rate": 9.336986301369863e-07, "log_odds_chosen": -0.6642822027206421, "log_odds_ratio": -1.4951506853103638, "logits/chosen": -0.9043145179748535, "logits/rejected": -0.8645736575126648, "logps/chosen": -3.6071529388427734, "logps/rejected": -2.973343849182129, "loss": 1.9617, "nll_loss": 1.8122262954711914, "rewards/accuracies": 0.375, "rewards/chosen": -0.3607153296470642, "rewards/margins": -0.06338092684745789, "rewards/rejected": -0.2973344027996063, "step": 484 }, { "epoch": 1.3278576317590691, "grad_norm": 4.168484210968018, "learning_rate": 9.335616438356165e-07, "log_odds_chosen": -0.8874807357788086, "log_odds_ratio": -1.4269459247589111, "logits/chosen": -0.9776673316955566, "logits/rejected": -0.9319186210632324, "logps/chosen": -3.6457953453063965, "logps/rejected": -2.7626023292541504, "loss": 1.8812, "nll_loss": 1.7385201454162598, "rewards/accuracies": 0.25, "rewards/chosen": -0.36457955837249756, "rewards/margins": -0.0883193090558052, "rewards/rejected": -0.27626025676727295, "step": 485 }, { "epoch": 1.3305954825462012, "grad_norm": 4.816213607788086, "learning_rate": 9.334246575342465e-07, "log_odds_chosen": -0.5119718909263611, "log_odds_ratio": -1.106309413909912, "logits/chosen": -0.9125758409500122, "logits/rejected": -0.9058638215065002, "logps/chosen": -3.2770802974700928, "logps/rejected": -2.7975432872772217, "loss": 1.9032, "nll_loss": 1.7925944328308105, "rewards/accuracies": 0.375, "rewards/chosen": -0.32770800590515137, "rewards/margins": -0.04795368015766144, "rewards/rejected": -0.2797543406486511, "step": 486 }, { "epoch": 1.3333333333333333, "grad_norm": 3.3423373699188232, "learning_rate": 9.332876712328767e-07, "log_odds_chosen": -0.1101175993680954, "log_odds_ratio": -0.980596125125885, "logits/chosen": -0.8749518394470215, "logits/rejected": -0.9588333368301392, "logps/chosen": -2.423133611679077, "logps/rejected": -2.2837560176849365, "loss": 1.7954, "nll_loss": 1.6973803043365479, "rewards/accuracies": 0.625, "rewards/chosen": -0.24231335520744324, "rewards/margins": -0.013937756419181824, "rewards/rejected": -0.2283755987882614, "step": 487 }, { "epoch": 1.3360711841204655, "grad_norm": 3.9788286685943604, "learning_rate": 9.331506849315069e-07, "log_odds_chosen": -0.8466473817825317, "log_odds_ratio": -1.4142218828201294, "logits/chosen": -0.9817092418670654, "logits/rejected": -0.9618313908576965, "logps/chosen": -3.2813422679901123, "logps/rejected": -2.4468250274658203, "loss": 1.8992, "nll_loss": 1.7578045129776, "rewards/accuracies": 0.375, "rewards/chosen": -0.3281342089176178, "rewards/margins": -0.08345171064138412, "rewards/rejected": -0.24468249082565308, "step": 488 }, { "epoch": 1.3388090349075976, "grad_norm": 4.6851630210876465, "learning_rate": 9.330136986301369e-07, "log_odds_chosen": -0.9296008944511414, "log_odds_ratio": -1.6515969038009644, "logits/chosen": -0.8864119052886963, "logits/rejected": -0.9020967483520508, "logps/chosen": -4.083072662353516, "logps/rejected": -3.1245856285095215, "loss": 1.8846, "nll_loss": 1.7194570302963257, "rewards/accuracies": 0.375, "rewards/chosen": -0.4083072543144226, "rewards/margins": -0.0958486944437027, "rewards/rejected": -0.3124585747718811, "step": 489 }, { "epoch": 1.3415468856947297, "grad_norm": 4.296204090118408, "learning_rate": 9.328767123287671e-07, "log_odds_chosen": -0.928188145160675, "log_odds_ratio": -1.3579517602920532, "logits/chosen": -0.9588658809661865, "logits/rejected": -0.9263182282447815, "logps/chosen": -3.3630423545837402, "logps/rejected": -2.473022699356079, "loss": 1.8424, "nll_loss": 1.7066138982772827, "rewards/accuracies": 0.25, "rewards/chosen": -0.336304247379303, "rewards/margins": -0.08900194615125656, "rewards/rejected": -0.24730229377746582, "step": 490 }, { "epoch": 1.3442847364818618, "grad_norm": 3.802360773086548, "learning_rate": 9.327397260273973e-07, "log_odds_chosen": -0.7202585935592651, "log_odds_ratio": -1.4994592666625977, "logits/chosen": -0.8375867605209351, "logits/rejected": -0.8006612062454224, "logps/chosen": -3.1487042903900146, "logps/rejected": -2.465179204940796, "loss": 1.8862, "nll_loss": 1.7362195253372192, "rewards/accuracies": 0.5, "rewards/chosen": -0.3148704171180725, "rewards/margins": -0.06835250556468964, "rewards/rejected": -0.24651792645454407, "step": 491 }, { "epoch": 1.3470225872689938, "grad_norm": 3.8992276191711426, "learning_rate": 9.326027397260274e-07, "log_odds_chosen": -0.12883880734443665, "log_odds_ratio": -1.1109716892242432, "logits/chosen": -0.9058643579483032, "logits/rejected": -0.9434406161308289, "logps/chosen": -2.905909776687622, "logps/rejected": -2.7823636531829834, "loss": 1.853, "nll_loss": 1.7418930530548096, "rewards/accuracies": 0.5, "rewards/chosen": -0.2905910015106201, "rewards/margins": -0.01235463097691536, "rewards/rejected": -0.27823638916015625, "step": 492 }, { "epoch": 1.3497604380561259, "grad_norm": 4.5601348876953125, "learning_rate": 9.324657534246575e-07, "log_odds_chosen": 0.16130797564983368, "log_odds_ratio": -0.9541504979133606, "logits/chosen": -0.9482983350753784, "logits/rejected": -0.8914971947669983, "logps/chosen": -3.564263105392456, "logps/rejected": -3.7504682540893555, "loss": 1.8463, "nll_loss": 1.7509090900421143, "rewards/accuracies": 0.375, "rewards/chosen": -0.35642629861831665, "rewards/margins": 0.018620522692799568, "rewards/rejected": -0.37504684925079346, "step": 493 }, { "epoch": 1.352498288843258, "grad_norm": 4.297572612762451, "learning_rate": 9.323287671232877e-07, "log_odds_chosen": 0.26670771837234497, "log_odds_ratio": -0.6575164794921875, "logits/chosen": -0.7711876630783081, "logits/rejected": -0.7836054563522339, "logps/chosen": -3.1624794006347656, "logps/rejected": -3.385185718536377, "loss": 1.8086, "nll_loss": 1.7428922653198242, "rewards/accuracies": 0.75, "rewards/chosen": -0.31624794006347656, "rewards/margins": 0.022270645946264267, "rewards/rejected": -0.33851855993270874, "step": 494 }, { "epoch": 1.35523613963039, "grad_norm": 4.491158485412598, "learning_rate": 9.321917808219178e-07, "log_odds_chosen": -0.47266411781311035, "log_odds_ratio": -1.1174730062484741, "logits/chosen": -0.8109104633331299, "logits/rejected": -0.8050130009651184, "logps/chosen": -3.399637222290039, "logps/rejected": -2.893627405166626, "loss": 1.8487, "nll_loss": 1.7369630336761475, "rewards/accuracies": 0.375, "rewards/chosen": -0.3399637043476105, "rewards/margins": -0.05060095712542534, "rewards/rejected": -0.28936275839805603, "step": 495 }, { "epoch": 1.3579739904175223, "grad_norm": 3.872058153152466, "learning_rate": 9.320547945205479e-07, "log_odds_chosen": 0.19777384400367737, "log_odds_ratio": -0.7054193019866943, "logits/chosen": -0.811224102973938, "logits/rejected": -0.8113490343093872, "logps/chosen": -2.598439931869507, "logps/rejected": -2.7736520767211914, "loss": 1.7688, "nll_loss": 1.6982803344726562, "rewards/accuracies": 0.625, "rewards/chosen": -0.25984400510787964, "rewards/margins": 0.017521224915981293, "rewards/rejected": -0.27736520767211914, "step": 496 }, { "epoch": 1.3607118412046544, "grad_norm": 3.2397823333740234, "learning_rate": 9.319178082191781e-07, "log_odds_chosen": 0.06663939356803894, "log_odds_ratio": -0.7145644426345825, "logits/chosen": -0.813372015953064, "logits/rejected": -0.9123958945274353, "logps/chosen": -2.2804064750671387, "logps/rejected": -2.311699151992798, "loss": 1.7, "nll_loss": 1.6285593509674072, "rewards/accuracies": 0.5, "rewards/chosen": -0.2280406504869461, "rewards/margins": 0.003129260614514351, "rewards/rejected": -0.2311698943376541, "step": 497 }, { "epoch": 1.3634496919917864, "grad_norm": 3.2795073986053467, "learning_rate": 9.317808219178082e-07, "log_odds_chosen": 0.5106921792030334, "log_odds_ratio": -0.6711671352386475, "logits/chosen": -0.7641639709472656, "logits/rejected": -0.8423398733139038, "logps/chosen": -2.1438257694244385, "logps/rejected": -2.5345299243927, "loss": 1.7589, "nll_loss": 1.6917871236801147, "rewards/accuracies": 0.75, "rewards/chosen": -0.2143825888633728, "rewards/margins": 0.039070408791303635, "rewards/rejected": -0.25345298647880554, "step": 498 }, { "epoch": 1.3661875427789185, "grad_norm": 4.264488220214844, "learning_rate": 9.316438356164384e-07, "log_odds_chosen": 0.17356616258621216, "log_odds_ratio": -0.913862943649292, "logits/chosen": -0.8285142779350281, "logits/rejected": -0.8525061011314392, "logps/chosen": -2.844082832336426, "logps/rejected": -2.997018337249756, "loss": 1.8284, "nll_loss": 1.7370057106018066, "rewards/accuracies": 0.5, "rewards/chosen": -0.284408301115036, "rewards/margins": 0.015293557196855545, "rewards/rejected": -0.29970186948776245, "step": 499 }, { "epoch": 1.3689253935660506, "grad_norm": 3.365171194076538, "learning_rate": 9.315068493150684e-07, "log_odds_chosen": 0.276271790266037, "log_odds_ratio": -0.6131040453910828, "logits/chosen": -0.8544321060180664, "logits/rejected": -0.984613299369812, "logps/chosen": -2.7310781478881836, "logps/rejected": -2.9652605056762695, "loss": 1.7256, "nll_loss": 1.6643226146697998, "rewards/accuracies": 0.625, "rewards/chosen": -0.2731077969074249, "rewards/margins": 0.023418249562382698, "rewards/rejected": -0.29652607440948486, "step": 500 }, { "epoch": 1.3716632443531829, "grad_norm": 4.088944911956787, "learning_rate": 9.313698630136986e-07, "log_odds_chosen": -0.14257746934890747, "log_odds_ratio": -0.906416654586792, "logits/chosen": -0.8226394653320312, "logits/rejected": -0.8312472105026245, "logps/chosen": -3.2517340183258057, "logps/rejected": -3.0876307487487793, "loss": 1.9027, "nll_loss": 1.812063455581665, "rewards/accuracies": 0.5, "rewards/chosen": -0.32517337799072266, "rewards/margins": -0.016410307958722115, "rewards/rejected": -0.3087630867958069, "step": 501 }, { "epoch": 1.374401095140315, "grad_norm": 4.223456859588623, "learning_rate": 9.312328767123288e-07, "log_odds_chosen": -0.08460256457328796, "log_odds_ratio": -1.0239485502243042, "logits/chosen": -0.9213459491729736, "logits/rejected": -0.8795337080955505, "logps/chosen": -3.0923590660095215, "logps/rejected": -3.0129661560058594, "loss": 1.7904, "nll_loss": 1.6879713535308838, "rewards/accuracies": 0.25, "rewards/chosen": -0.30923593044281006, "rewards/margins": -0.00793931633234024, "rewards/rejected": -0.3012966215610504, "step": 502 }, { "epoch": 1.377138945927447, "grad_norm": 3.2422423362731934, "learning_rate": 9.310958904109588e-07, "log_odds_chosen": 0.36045488715171814, "log_odds_ratio": -0.5928750038146973, "logits/chosen": -0.9260972738265991, "logits/rejected": -0.9567686319351196, "logps/chosen": -2.0466361045837402, "logps/rejected": -2.344168186187744, "loss": 1.7053, "nll_loss": 1.6460541486740112, "rewards/accuracies": 0.625, "rewards/chosen": -0.20466363430023193, "rewards/margins": 0.029753180220723152, "rewards/rejected": -0.23441681265830994, "step": 503 }, { "epoch": 1.379876796714579, "grad_norm": 4.032665729522705, "learning_rate": 9.30958904109589e-07, "log_odds_chosen": 0.2088005244731903, "log_odds_ratio": -0.6793091297149658, "logits/chosen": -0.7832350730895996, "logits/rejected": -0.8058417439460754, "logps/chosen": -2.817995071411133, "logps/rejected": -2.96466064453125, "loss": 1.7359, "nll_loss": 1.6680115461349487, "rewards/accuracies": 0.625, "rewards/chosen": -0.2817994952201843, "rewards/margins": 0.014666564762592316, "rewards/rejected": -0.29646605253219604, "step": 504 }, { "epoch": 1.3826146475017111, "grad_norm": 4.0158233642578125, "learning_rate": 9.308219178082192e-07, "log_odds_chosen": -0.13934308290481567, "log_odds_ratio": -0.8645936250686646, "logits/chosen": -0.8011181354522705, "logits/rejected": -0.7880538105964661, "logps/chosen": -2.890002727508545, "logps/rejected": -2.731372356414795, "loss": 1.7761, "nll_loss": 1.6896281242370605, "rewards/accuracies": 0.375, "rewards/chosen": -0.2890002727508545, "rewards/margins": -0.015863025560975075, "rewards/rejected": -0.27313724160194397, "step": 505 }, { "epoch": 1.3853524982888432, "grad_norm": 4.287626266479492, "learning_rate": 9.306849315068493e-07, "log_odds_chosen": -0.21418534219264984, "log_odds_ratio": -0.8539078235626221, "logits/chosen": -0.8565189838409424, "logits/rejected": -0.8194257616996765, "logps/chosen": -2.6321752071380615, "logps/rejected": -2.4200968742370605, "loss": 1.7147, "nll_loss": 1.629295825958252, "rewards/accuracies": 0.375, "rewards/chosen": -0.2632175087928772, "rewards/margins": -0.021207816898822784, "rewards/rejected": -0.2420097142457962, "step": 506 }, { "epoch": 1.3880903490759753, "grad_norm": 3.2381064891815186, "learning_rate": 9.305479452054794e-07, "log_odds_chosen": 0.020237930119037628, "log_odds_ratio": -0.7485999464988708, "logits/chosen": -0.9002214074134827, "logits/rejected": -0.9186404943466187, "logps/chosen": -2.1985695362091064, "logps/rejected": -2.184818744659424, "loss": 1.6641, "nll_loss": 1.5892088413238525, "rewards/accuracies": 0.5, "rewards/chosen": -0.21985694766044617, "rewards/margins": -0.0013750800862908363, "rewards/rejected": -0.2184818834066391, "step": 507 }, { "epoch": 1.3908281998631074, "grad_norm": 3.88840389251709, "learning_rate": 9.304109589041096e-07, "log_odds_chosen": 0.5505133867263794, "log_odds_ratio": -0.4999428987503052, "logits/chosen": -0.8250694274902344, "logits/rejected": -0.9147200584411621, "logps/chosen": -2.667039155960083, "logps/rejected": -3.172266960144043, "loss": 1.6915, "nll_loss": 1.641514778137207, "rewards/accuracies": 0.75, "rewards/chosen": -0.26670390367507935, "rewards/margins": 0.050522804260253906, "rewards/rejected": -0.31722670793533325, "step": 508 }, { "epoch": 1.3935660506502396, "grad_norm": 4.299200534820557, "learning_rate": 9.302739726027397e-07, "log_odds_chosen": -0.4487556219100952, "log_odds_ratio": -1.0506232976913452, "logits/chosen": -0.9272090792655945, "logits/rejected": -0.9125601053237915, "logps/chosen": -2.6170523166656494, "logps/rejected": -2.178335666656494, "loss": 1.7746, "nll_loss": 1.6695667505264282, "rewards/accuracies": 0.5, "rewards/chosen": -0.261705219745636, "rewards/margins": -0.043871648609638214, "rewards/rejected": -0.21783357858657837, "step": 509 }, { "epoch": 1.3963039014373717, "grad_norm": 3.6639862060546875, "learning_rate": 9.301369863013698e-07, "log_odds_chosen": -0.013880699872970581, "log_odds_ratio": -0.7595995664596558, "logits/chosen": -0.8889558911323547, "logits/rejected": -0.8980950117111206, "logps/chosen": -2.271775245666504, "logps/rejected": -2.2616546154022217, "loss": 1.7365, "nll_loss": 1.6605690717697144, "rewards/accuracies": 0.5, "rewards/chosen": -0.22717750072479248, "rewards/margins": -0.0010120393708348274, "rewards/rejected": -0.22616548836231232, "step": 510 }, { "epoch": 1.3990417522245038, "grad_norm": 4.990373134613037, "learning_rate": 9.3e-07, "log_odds_chosen": -1.031476378440857, "log_odds_ratio": -1.405042290687561, "logits/chosen": -0.9062962532043457, "logits/rejected": -0.7831538319587708, "logps/chosen": -3.6725783348083496, "logps/rejected": -2.71738600730896, "loss": 1.8393, "nll_loss": 1.6988002061843872, "rewards/accuracies": 0.125, "rewards/chosen": -0.36725786328315735, "rewards/margins": -0.09551926702260971, "rewards/rejected": -0.27173858880996704, "step": 511 }, { "epoch": 1.4017796030116358, "grad_norm": 3.4425604343414307, "learning_rate": 9.298630136986301e-07, "log_odds_chosen": 0.8270271420478821, "log_odds_ratio": -0.5602179765701294, "logits/chosen": -0.8677992820739746, "logits/rejected": -0.9006636142730713, "logps/chosen": -2.3014674186706543, "logps/rejected": -3.052067518234253, "loss": 1.7203, "nll_loss": 1.6642487049102783, "rewards/accuracies": 0.75, "rewards/chosen": -0.23014673590660095, "rewards/margins": 0.07506004720926285, "rewards/rejected": -0.3052067756652832, "step": 512 }, { "epoch": 1.404517453798768, "grad_norm": 4.201154708862305, "learning_rate": 9.297260273972603e-07, "log_odds_chosen": 0.21525630354881287, "log_odds_ratio": -1.0495271682739258, "logits/chosen": -0.8818284273147583, "logits/rejected": -0.8752933740615845, "logps/chosen": -3.4890990257263184, "logps/rejected": -3.6249942779541016, "loss": 1.8056, "nll_loss": 1.7005984783172607, "rewards/accuracies": 0.5, "rewards/chosen": -0.3489098846912384, "rewards/margins": 0.013589546084403992, "rewards/rejected": -0.3624994158744812, "step": 513 }, { "epoch": 1.4072553045859002, "grad_norm": 4.8927788734436035, "learning_rate": 9.295890410958903e-07, "log_odds_chosen": -0.47814518213272095, "log_odds_ratio": -1.1860164403915405, "logits/chosen": -0.7660917043685913, "logits/rejected": -0.7574325799942017, "logps/chosen": -3.943988800048828, "logps/rejected": -3.451552152633667, "loss": 1.8186, "nll_loss": 1.7000269889831543, "rewards/accuracies": 0.25, "rewards/chosen": -0.39439889788627625, "rewards/margins": -0.04924365133047104, "rewards/rejected": -0.3451552391052246, "step": 514 }, { "epoch": 1.4099931553730323, "grad_norm": 3.644449234008789, "learning_rate": 9.294520547945205e-07, "log_odds_chosen": 0.3884121775627136, "log_odds_ratio": -0.5510567426681519, "logits/chosen": -0.818266749382019, "logits/rejected": -0.8421810269355774, "logps/chosen": -2.503685712814331, "logps/rejected": -2.8327903747558594, "loss": 1.6716, "nll_loss": 1.6164920330047607, "rewards/accuracies": 0.75, "rewards/chosen": -0.25036856532096863, "rewards/margins": 0.03291047364473343, "rewards/rejected": -0.28327906131744385, "step": 515 }, { "epoch": 1.4127310061601643, "grad_norm": 3.8435323238372803, "learning_rate": 9.293150684931507e-07, "log_odds_chosen": 0.2395631968975067, "log_odds_ratio": -0.670354962348938, "logits/chosen": -0.784360408782959, "logits/rejected": -0.9028178453445435, "logps/chosen": -2.119040012359619, "logps/rejected": -2.2812771797180176, "loss": 1.6071, "nll_loss": 1.540076494216919, "rewards/accuracies": 0.75, "rewards/chosen": -0.21190400421619415, "rewards/margins": 0.016223713755607605, "rewards/rejected": -0.22812771797180176, "step": 516 }, { "epoch": 1.4154688569472964, "grad_norm": 3.9238715171813965, "learning_rate": 9.291780821917807e-07, "log_odds_chosen": -0.8188405632972717, "log_odds_ratio": -1.414418339729309, "logits/chosen": -0.8541945219039917, "logits/rejected": -0.8252763152122498, "logps/chosen": -3.3732848167419434, "logps/rejected": -2.571725845336914, "loss": 1.8524, "nll_loss": 1.7109853029251099, "rewards/accuracies": 0.375, "rewards/chosen": -0.3373285233974457, "rewards/margins": -0.08015593886375427, "rewards/rejected": -0.2571725845336914, "step": 517 }, { "epoch": 1.4182067077344285, "grad_norm": 3.785876750946045, "learning_rate": 9.290410958904109e-07, "log_odds_chosen": 0.28682631254196167, "log_odds_ratio": -1.4342771768569946, "logits/chosen": -0.8064413666725159, "logits/rejected": -0.8688052892684937, "logps/chosen": -3.404484272003174, "logps/rejected": -3.5990705490112305, "loss": 1.7738, "nll_loss": 1.6303765773773193, "rewards/accuracies": 0.625, "rewards/chosen": -0.34044843912124634, "rewards/margins": 0.019458593800663948, "rewards/rejected": -0.35990703105926514, "step": 518 }, { "epoch": 1.4209445585215605, "grad_norm": 3.846534013748169, "learning_rate": 9.289041095890411e-07, "log_odds_chosen": 0.8000267744064331, "log_odds_ratio": -0.496675044298172, "logits/chosen": -0.8436944484710693, "logits/rejected": -0.8230317234992981, "logps/chosen": -2.339787244796753, "logps/rejected": -3.069023609161377, "loss": 1.7074, "nll_loss": 1.657724142074585, "rewards/accuracies": 0.875, "rewards/chosen": -0.233978733420372, "rewards/margins": 0.07292363792657852, "rewards/rejected": -0.30690237879753113, "step": 519 }, { "epoch": 1.4236824093086926, "grad_norm": 3.7666029930114746, "learning_rate": 9.287671232876712e-07, "log_odds_chosen": 0.14867888391017914, "log_odds_ratio": -0.7518683671951294, "logits/chosen": -0.8660462498664856, "logits/rejected": -0.8506151437759399, "logps/chosen": -2.3401541709899902, "logps/rejected": -2.491145372390747, "loss": 1.7512, "nll_loss": 1.6760447025299072, "rewards/accuracies": 0.25, "rewards/chosen": -0.23401544988155365, "rewards/margins": 0.01509907841682434, "rewards/rejected": -0.249114528298378, "step": 520 }, { "epoch": 1.4264202600958247, "grad_norm": 4.401301860809326, "learning_rate": 9.286301369863013e-07, "log_odds_chosen": -0.013869404792785645, "log_odds_ratio": -0.8027236461639404, "logits/chosen": -0.7567062377929688, "logits/rejected": -0.7904961705207825, "logps/chosen": -3.5311248302459717, "logps/rejected": -3.4922189712524414, "loss": 1.7212, "nll_loss": 1.6409626007080078, "rewards/accuracies": 0.5, "rewards/chosen": -0.35311245918273926, "rewards/margins": -0.0038905497640371323, "rewards/rejected": -0.3492219150066376, "step": 521 }, { "epoch": 1.4291581108829567, "grad_norm": 3.540255069732666, "learning_rate": 9.284931506849315e-07, "log_odds_chosen": 0.5477595925331116, "log_odds_ratio": -0.5445755124092102, "logits/chosen": -0.8567995429039001, "logits/rejected": -0.8596559762954712, "logps/chosen": -2.337132453918457, "logps/rejected": -2.8067853450775146, "loss": 1.668, "nll_loss": 1.6135904788970947, "rewards/accuracies": 0.875, "rewards/chosen": -0.23371323943138123, "rewards/margins": 0.04696529358625412, "rewards/rejected": -0.28067854046821594, "step": 522 }, { "epoch": 1.431895961670089, "grad_norm": 4.697103023529053, "learning_rate": 9.283561643835616e-07, "log_odds_chosen": -0.4828696548938751, "log_odds_ratio": -1.271402359008789, "logits/chosen": -0.836808443069458, "logits/rejected": -0.7931305170059204, "logps/chosen": -3.7632927894592285, "logps/rejected": -3.2816505432128906, "loss": 1.8076, "nll_loss": 1.6804676055908203, "rewards/accuracies": 0.25, "rewards/chosen": -0.37632930278778076, "rewards/margins": -0.04816422611474991, "rewards/rejected": -0.32816505432128906, "step": 523 }, { "epoch": 1.434633812457221, "grad_norm": 4.524424076080322, "learning_rate": 9.282191780821917e-07, "log_odds_chosen": -0.22550693154335022, "log_odds_ratio": -0.9495761394500732, "logits/chosen": -0.85089111328125, "logits/rejected": -0.8491012454032898, "logps/chosen": -3.0510921478271484, "logps/rejected": -2.8101439476013184, "loss": 1.713, "nll_loss": 1.6180351972579956, "rewards/accuracies": 0.5, "rewards/chosen": -0.3051092028617859, "rewards/margins": -0.024094829335808754, "rewards/rejected": -0.2810143828392029, "step": 524 }, { "epoch": 1.4373716632443532, "grad_norm": 4.425411701202393, "learning_rate": 9.280821917808219e-07, "log_odds_chosen": 0.7166216373443604, "log_odds_ratio": -0.8214325308799744, "logits/chosen": -0.9232723116874695, "logits/rejected": -0.9416404962539673, "logps/chosen": -3.362039804458618, "logps/rejected": -4.04764461517334, "loss": 1.7494, "nll_loss": 1.6672580242156982, "rewards/accuracies": 0.5, "rewards/chosen": -0.33620399236679077, "rewards/margins": 0.06856051087379456, "rewards/rejected": -0.40476447343826294, "step": 525 }, { "epoch": 1.4401095140314852, "grad_norm": 4.495201110839844, "learning_rate": 9.27945205479452e-07, "log_odds_chosen": 0.2560553252696991, "log_odds_ratio": -0.8042387366294861, "logits/chosen": -0.7933024168014526, "logits/rejected": -0.8620696663856506, "logps/chosen": -2.60150146484375, "logps/rejected": -2.7863354682922363, "loss": 1.7106, "nll_loss": 1.630152940750122, "rewards/accuracies": 0.625, "rewards/chosen": -0.26015013456344604, "rewards/margins": 0.018483391031622887, "rewards/rejected": -0.2786335349082947, "step": 526 }, { "epoch": 1.4428473648186173, "grad_norm": 3.745643138885498, "learning_rate": 9.278082191780822e-07, "log_odds_chosen": 0.09313017129898071, "log_odds_ratio": -0.8894457817077637, "logits/chosen": -0.8437995910644531, "logits/rejected": -0.9792207479476929, "logps/chosen": -2.7383298873901367, "logps/rejected": -2.7924790382385254, "loss": 1.7053, "nll_loss": 1.61637282371521, "rewards/accuracies": 0.375, "rewards/chosen": -0.2738329768180847, "rewards/margins": 0.005414949730038643, "rewards/rejected": -0.2792479395866394, "step": 527 }, { "epoch": 1.4455852156057496, "grad_norm": 3.4853084087371826, "learning_rate": 9.276712328767123e-07, "log_odds_chosen": 0.37598925828933716, "log_odds_ratio": -0.5525884032249451, "logits/chosen": -0.7648864984512329, "logits/rejected": -0.8057741522789001, "logps/chosen": -2.2374532222747803, "logps/rejected": -2.5453402996063232, "loss": 1.6347, "nll_loss": 1.5794461965560913, "rewards/accuracies": 0.625, "rewards/chosen": -0.22374533116817474, "rewards/margins": 0.030788715928792953, "rewards/rejected": -0.2545340657234192, "step": 528 }, { "epoch": 1.4483230663928817, "grad_norm": 3.7282795906066895, "learning_rate": 9.275342465753424e-07, "log_odds_chosen": 0.27318963408470154, "log_odds_ratio": -0.8222536444664001, "logits/chosen": -0.7845357656478882, "logits/rejected": -0.8266331553459167, "logps/chosen": -2.5717129707336426, "logps/rejected": -2.7756497859954834, "loss": 1.6711, "nll_loss": 1.588913917541504, "rewards/accuracies": 0.5, "rewards/chosen": -0.25717130303382874, "rewards/margins": 0.02039368264377117, "rewards/rejected": -0.27756500244140625, "step": 529 }, { "epoch": 1.4510609171800137, "grad_norm": 3.0236313343048096, "learning_rate": 9.273972602739726e-07, "log_odds_chosen": 0.49065178632736206, "log_odds_ratio": -0.6288679242134094, "logits/chosen": -0.6838557720184326, "logits/rejected": -0.8418615460395813, "logps/chosen": -1.815981388092041, "logps/rejected": -2.2293410301208496, "loss": 1.611, "nll_loss": 1.5481189489364624, "rewards/accuracies": 0.875, "rewards/chosen": -0.18159814178943634, "rewards/margins": 0.041335947811603546, "rewards/rejected": -0.2229340672492981, "step": 530 }, { "epoch": 1.4537987679671458, "grad_norm": 5.2621002197265625, "learning_rate": 9.272602739726026e-07, "log_odds_chosen": -1.0984959602355957, "log_odds_ratio": -1.8060479164123535, "logits/chosen": -0.8522779941558838, "logits/rejected": -0.8081815242767334, "logps/chosen": -3.62026047706604, "logps/rejected": -2.5543832778930664, "loss": 1.7961, "nll_loss": 1.6155016422271729, "rewards/accuracies": 0.375, "rewards/chosen": -0.36202603578567505, "rewards/margins": -0.106587715446949, "rewards/rejected": -0.25543832778930664, "step": 531 }, { "epoch": 1.4565366187542779, "grad_norm": 3.555750846862793, "learning_rate": 9.271232876712328e-07, "log_odds_chosen": 0.07950732111930847, "log_odds_ratio": -0.7506555318832397, "logits/chosen": -0.776656985282898, "logits/rejected": -0.8105055093765259, "logps/chosen": -2.7031238079071045, "logps/rejected": -2.7328691482543945, "loss": 1.6846, "nll_loss": 1.6095080375671387, "rewards/accuracies": 0.625, "rewards/chosen": -0.2703123986721039, "rewards/margins": 0.002974521368741989, "rewards/rejected": -0.273286908864975, "step": 532 }, { "epoch": 1.45927446954141, "grad_norm": 3.4792978763580322, "learning_rate": 9.26986301369863e-07, "log_odds_chosen": 0.2589919865131378, "log_odds_ratio": -0.6626994013786316, "logits/chosen": -0.8357898592948914, "logits/rejected": -0.8799583315849304, "logps/chosen": -1.9090118408203125, "logps/rejected": -2.1004629135131836, "loss": 1.6495, "nll_loss": 1.583275318145752, "rewards/accuracies": 0.625, "rewards/chosen": -0.19090117514133453, "rewards/margins": 0.019145138561725616, "rewards/rejected": -0.21004632115364075, "step": 533 }, { "epoch": 1.462012320328542, "grad_norm": 3.7721848487854004, "learning_rate": 9.268493150684931e-07, "log_odds_chosen": -0.6153131127357483, "log_odds_ratio": -1.1473515033721924, "logits/chosen": -0.8253940343856812, "logits/rejected": -0.8314513564109802, "logps/chosen": -3.4024038314819336, "logps/rejected": -2.809936761856079, "loss": 1.7858, "nll_loss": 1.6710689067840576, "rewards/accuracies": 0.25, "rewards/chosen": -0.3402404189109802, "rewards/margins": -0.059246726334095, "rewards/rejected": -0.28099367022514343, "step": 534 }, { "epoch": 1.464750171115674, "grad_norm": 3.415512800216675, "learning_rate": 9.267123287671232e-07, "log_odds_chosen": -0.18510191142559052, "log_odds_ratio": -0.939041793346405, "logits/chosen": -0.852548360824585, "logits/rejected": -0.8665869832038879, "logps/chosen": -2.3778347969055176, "logps/rejected": -2.1977996826171875, "loss": 1.667, "nll_loss": 1.5731284618377686, "rewards/accuracies": 0.5, "rewards/chosen": -0.23778346180915833, "rewards/margins": -0.018003514036536217, "rewards/rejected": -0.21977996826171875, "step": 535 }, { "epoch": 1.4674880219028064, "grad_norm": 4.539547443389893, "learning_rate": 9.265753424657534e-07, "log_odds_chosen": -0.3817055821418762, "log_odds_ratio": -1.1092042922973633, "logits/chosen": -0.8643949031829834, "logits/rejected": -0.775729775428772, "logps/chosen": -3.275320291519165, "logps/rejected": -2.905674695968628, "loss": 1.735, "nll_loss": 1.6240825653076172, "rewards/accuracies": 0.375, "rewards/chosen": -0.327532023191452, "rewards/margins": -0.03696456179022789, "rewards/rejected": -0.29056745767593384, "step": 536 }, { "epoch": 1.4702258726899384, "grad_norm": 3.4141013622283936, "learning_rate": 9.264383561643835e-07, "log_odds_chosen": -0.4476965665817261, "log_odds_ratio": -1.1316357851028442, "logits/chosen": -0.8854211568832397, "logits/rejected": -0.9214844703674316, "logps/chosen": -2.7423672676086426, "logps/rejected": -2.28735089302063, "loss": 1.7308, "nll_loss": 1.6176159381866455, "rewards/accuracies": 0.625, "rewards/chosen": -0.2742367088794708, "rewards/margins": -0.045501627027988434, "rewards/rejected": -0.228735089302063, "step": 537 }, { "epoch": 1.4729637234770705, "grad_norm": 3.1902644634246826, "learning_rate": 9.263013698630136e-07, "log_odds_chosen": 0.7677456140518188, "log_odds_ratio": -0.4558820128440857, "logits/chosen": -0.6703156232833862, "logits/rejected": -0.7905790209770203, "logps/chosen": -1.9429566860198975, "logps/rejected": -2.5883779525756836, "loss": 1.5586, "nll_loss": 1.512966275215149, "rewards/accuracies": 0.875, "rewards/chosen": -0.19429567456245422, "rewards/margins": 0.06454212963581085, "rewards/rejected": -0.25883781909942627, "step": 538 }, { "epoch": 1.4757015742642026, "grad_norm": 3.8232760429382324, "learning_rate": 9.261643835616438e-07, "log_odds_chosen": -0.05443370342254639, "log_odds_ratio": -1.0361380577087402, "logits/chosen": -0.9265299439430237, "logits/rejected": -0.9564539790153503, "logps/chosen": -3.7301135063171387, "logps/rejected": -3.6351332664489746, "loss": 1.7566, "nll_loss": 1.6529535055160522, "rewards/accuracies": 0.75, "rewards/chosen": -0.3730113208293915, "rewards/margins": -0.0094979926943779, "rewards/rejected": -0.363513320684433, "step": 539 }, { "epoch": 1.4784394250513346, "grad_norm": 3.68213152885437, "learning_rate": 9.260273972602739e-07, "log_odds_chosen": -0.21960169076919556, "log_odds_ratio": -0.9405016899108887, "logits/chosen": -0.8171294927597046, "logits/rejected": -0.807494580745697, "logps/chosen": -2.715670108795166, "logps/rejected": -2.471231460571289, "loss": 1.6385, "nll_loss": 1.5444839000701904, "rewards/accuracies": 0.625, "rewards/chosen": -0.27156704664230347, "rewards/margins": -0.0244438536465168, "rewards/rejected": -0.24712316691875458, "step": 540 }, { "epoch": 1.481177275838467, "grad_norm": 3.5983190536499023, "learning_rate": 9.258904109589041e-07, "log_odds_chosen": 0.5274028778076172, "log_odds_ratio": -0.5484914779663086, "logits/chosen": -0.7441479563713074, "logits/rejected": -0.6970102190971375, "logps/chosen": -2.1257572174072266, "logps/rejected": -2.6005125045776367, "loss": 1.7045, "nll_loss": 1.6496453285217285, "rewards/accuracies": 0.75, "rewards/chosen": -0.2125757336616516, "rewards/margins": 0.047475531697273254, "rewards/rejected": -0.26005128026008606, "step": 541 }, { "epoch": 1.483915126625599, "grad_norm": 3.7107295989990234, "learning_rate": 9.257534246575342e-07, "log_odds_chosen": 0.13345667719841003, "log_odds_ratio": -0.7560657262802124, "logits/chosen": -0.7936950922012329, "logits/rejected": -0.8879972696304321, "logps/chosen": -2.659447193145752, "logps/rejected": -2.7429656982421875, "loss": 1.6511, "nll_loss": 1.575494408607483, "rewards/accuracies": 0.625, "rewards/chosen": -0.2659447491168976, "rewards/margins": 0.008351840078830719, "rewards/rejected": -0.2742965817451477, "step": 542 }, { "epoch": 1.486652977412731, "grad_norm": 4.695859909057617, "learning_rate": 9.256164383561643e-07, "log_odds_chosen": -0.15349793434143066, "log_odds_ratio": -0.9840303659439087, "logits/chosen": -0.7649990916252136, "logits/rejected": -0.7743000388145447, "logps/chosen": -2.600445508956909, "logps/rejected": -2.3704869747161865, "loss": 1.621, "nll_loss": 1.5225886106491089, "rewards/accuracies": 0.5, "rewards/chosen": -0.26004451513290405, "rewards/margins": -0.0229958388954401, "rewards/rejected": -0.2370486855506897, "step": 543 }, { "epoch": 1.4893908281998631, "grad_norm": 4.423020839691162, "learning_rate": 9.254794520547945e-07, "log_odds_chosen": 0.22163720428943634, "log_odds_ratio": -0.8049583435058594, "logits/chosen": -0.7752357721328735, "logits/rejected": -0.7686829566955566, "logps/chosen": -3.0905303955078125, "logps/rejected": -3.3012585639953613, "loss": 1.6814, "nll_loss": 1.6009397506713867, "rewards/accuracies": 0.5, "rewards/chosen": -0.30905306339263916, "rewards/margins": 0.021072836592793465, "rewards/rejected": -0.3301258683204651, "step": 544 }, { "epoch": 1.4921286789869952, "grad_norm": 3.7904932498931885, "learning_rate": 9.253424657534245e-07, "log_odds_chosen": 0.2877681255340576, "log_odds_ratio": -0.5890829563140869, "logits/chosen": -0.8804981112480164, "logits/rejected": -0.9582110643386841, "logps/chosen": -2.6612606048583984, "logps/rejected": -2.890207052230835, "loss": 1.677, "nll_loss": 1.6180498600006104, "rewards/accuracies": 0.625, "rewards/chosen": -0.2661260664463043, "rewards/margins": 0.022894635796546936, "rewards/rejected": -0.28902071714401245, "step": 545 }, { "epoch": 1.4948665297741273, "grad_norm": 3.383103132247925, "learning_rate": 9.252054794520547e-07, "log_odds_chosen": 0.18611609935760498, "log_odds_ratio": -0.9748901128768921, "logits/chosen": -0.6839638948440552, "logits/rejected": -0.8008716106414795, "logps/chosen": -2.5115599632263184, "logps/rejected": -2.6279730796813965, "loss": 1.6503, "nll_loss": 1.5528274774551392, "rewards/accuracies": 0.75, "rewards/chosen": -0.2511560320854187, "rewards/margins": 0.011641278862953186, "rewards/rejected": -0.2627972960472107, "step": 546 }, { "epoch": 1.4976043805612593, "grad_norm": 3.5598273277282715, "learning_rate": 9.250684931506849e-07, "log_odds_chosen": 0.04546959698200226, "log_odds_ratio": -0.8121554255485535, "logits/chosen": -0.7363250851631165, "logits/rejected": -0.8025459051132202, "logps/chosen": -2.281111717224121, "logps/rejected": -2.2604422569274902, "loss": 1.6143, "nll_loss": 1.5330862998962402, "rewards/accuracies": 0.875, "rewards/chosen": -0.2281111776828766, "rewards/margins": -0.0020669307559728622, "rewards/rejected": -0.22604422271251678, "step": 547 }, { "epoch": 1.5003422313483914, "grad_norm": 4.131327152252197, "learning_rate": 9.249315068493149e-07, "log_odds_chosen": -0.5640161037445068, "log_odds_ratio": -1.2814770936965942, "logits/chosen": -0.8346433639526367, "logits/rejected": -0.8296773433685303, "logps/chosen": -3.1582775115966797, "logps/rejected": -2.562530994415283, "loss": 1.7617, "nll_loss": 1.633532166481018, "rewards/accuracies": 0.625, "rewards/chosen": -0.31582775712013245, "rewards/margins": -0.05957465618848801, "rewards/rejected": -0.25625309348106384, "step": 548 }, { "epoch": 1.5030800821355235, "grad_norm": 3.99031925201416, "learning_rate": 9.247945205479451e-07, "log_odds_chosen": -0.41863858699798584, "log_odds_ratio": -1.0368905067443848, "logits/chosen": -0.878351628780365, "logits/rejected": -0.8570705652236938, "logps/chosen": -2.9501965045928955, "logps/rejected": -2.551614284515381, "loss": 1.7408, "nll_loss": 1.637070894241333, "rewards/accuracies": 0.375, "rewards/chosen": -0.29501965641975403, "rewards/margins": -0.03985821455717087, "rewards/rejected": -0.25516146421432495, "step": 549 }, { "epoch": 1.5058179329226558, "grad_norm": 3.9832472801208496, "learning_rate": 9.246575342465753e-07, "log_odds_chosen": -0.31591174006462097, "log_odds_ratio": -0.9077785015106201, "logits/chosen": -0.8852339386940002, "logits/rejected": -0.8199284672737122, "logps/chosen": -2.5847392082214355, "logps/rejected": -2.3106536865234375, "loss": 1.7077, "nll_loss": 1.6169003248214722, "rewards/accuracies": 0.25, "rewards/chosen": -0.2584739327430725, "rewards/margins": -0.027408558875322342, "rewards/rejected": -0.23106539249420166, "step": 550 }, { "epoch": 1.5085557837097878, "grad_norm": 3.344264507293701, "learning_rate": 9.245205479452054e-07, "log_odds_chosen": -0.08402881026268005, "log_odds_ratio": -0.9150174856185913, "logits/chosen": -0.6542233228683472, "logits/rejected": -0.7211755514144897, "logps/chosen": -2.6931214332580566, "logps/rejected": -2.5607330799102783, "loss": 1.6366, "nll_loss": 1.5450977087020874, "rewards/accuracies": 0.75, "rewards/chosen": -0.26931217312812805, "rewards/margins": -0.013238860294222832, "rewards/rejected": -0.2560732960700989, "step": 551 }, { "epoch": 1.51129363449692, "grad_norm": 4.940526485443115, "learning_rate": 9.243835616438355e-07, "log_odds_chosen": -0.7384568452835083, "log_odds_ratio": -1.311078667640686, "logits/chosen": -0.820035457611084, "logits/rejected": -0.8142881989479065, "logps/chosen": -3.593928337097168, "logps/rejected": -2.856600761413574, "loss": 1.6782, "nll_loss": 1.5470705032348633, "rewards/accuracies": 0.25, "rewards/chosen": -0.35939285159111023, "rewards/margins": -0.07373277097940445, "rewards/rejected": -0.2856600880622864, "step": 552 }, { "epoch": 1.5140314852840522, "grad_norm": 4.053948879241943, "learning_rate": 9.242465753424657e-07, "log_odds_chosen": 0.07495987415313721, "log_odds_ratio": -0.7881327271461487, "logits/chosen": -0.8315345048904419, "logits/rejected": -0.8243609070777893, "logps/chosen": -2.8152072429656982, "logps/rejected": -2.8253819942474365, "loss": 1.5841, "nll_loss": 1.505296230316162, "rewards/accuracies": 0.625, "rewards/chosen": -0.2815207242965698, "rewards/margins": 0.0010174717754125595, "rewards/rejected": -0.28253820538520813, "step": 553 }, { "epoch": 1.5167693360711842, "grad_norm": 4.381509304046631, "learning_rate": 9.241095890410958e-07, "log_odds_chosen": -0.4595765471458435, "log_odds_ratio": -1.0806890726089478, "logits/chosen": -0.8127155303955078, "logits/rejected": -0.7404760122299194, "logps/chosen": -3.0812888145446777, "logps/rejected": -2.618879556655884, "loss": 1.7389, "nll_loss": 1.6308271884918213, "rewards/accuracies": 0.375, "rewards/chosen": -0.30812889337539673, "rewards/margins": -0.046240948140621185, "rewards/rejected": -0.26188796758651733, "step": 554 }, { "epoch": 1.5195071868583163, "grad_norm": 3.828805685043335, "learning_rate": 9.239726027397259e-07, "log_odds_chosen": 0.5043976306915283, "log_odds_ratio": -0.643047034740448, "logits/chosen": -0.8572917580604553, "logits/rejected": -0.9205783605575562, "logps/chosen": -3.023533344268799, "logps/rejected": -3.467132568359375, "loss": 1.6961, "nll_loss": 1.6317565441131592, "rewards/accuracies": 0.75, "rewards/chosen": -0.3023533225059509, "rewards/margins": 0.04435993731021881, "rewards/rejected": -0.34671324491500854, "step": 555 }, { "epoch": 1.5222450376454484, "grad_norm": 4.371698379516602, "learning_rate": 9.238356164383561e-07, "log_odds_chosen": -0.3891220688819885, "log_odds_ratio": -0.9942697882652283, "logits/chosen": -0.7744395732879639, "logits/rejected": -0.8330733776092529, "logps/chosen": -2.335480213165283, "logps/rejected": -1.956041932106018, "loss": 1.5995, "nll_loss": 1.5000938177108765, "rewards/accuracies": 0.375, "rewards/chosen": -0.23354803025722504, "rewards/margins": -0.03794383257627487, "rewards/rejected": -0.19560419023036957, "step": 556 }, { "epoch": 1.5249828884325805, "grad_norm": 3.7106685638427734, "learning_rate": 9.236986301369862e-07, "log_odds_chosen": -0.19335180521011353, "log_odds_ratio": -1.0621004104614258, "logits/chosen": -0.7693347930908203, "logits/rejected": -0.848639965057373, "logps/chosen": -3.1029653549194336, "logps/rejected": -2.8811845779418945, "loss": 1.6605, "nll_loss": 1.5543385744094849, "rewards/accuracies": 0.5, "rewards/chosen": -0.31029653549194336, "rewards/margins": -0.02217809297144413, "rewards/rejected": -0.288118451833725, "step": 557 }, { "epoch": 1.5277207392197125, "grad_norm": 3.841212511062622, "learning_rate": 9.235616438356164e-07, "log_odds_chosen": 0.15513789653778076, "log_odds_ratio": -0.6770759224891663, "logits/chosen": -0.8249895572662354, "logits/rejected": -0.8358516693115234, "logps/chosen": -2.3809378147125244, "logps/rejected": -2.5125036239624023, "loss": 1.5851, "nll_loss": 1.5174397230148315, "rewards/accuracies": 0.625, "rewards/chosen": -0.238093763589859, "rewards/margins": 0.013156596571207047, "rewards/rejected": -0.25125038623809814, "step": 558 }, { "epoch": 1.5304585900068446, "grad_norm": 3.454897165298462, "learning_rate": 9.234246575342465e-07, "log_odds_chosen": -0.37670791149139404, "log_odds_ratio": -1.013115644454956, "logits/chosen": -0.8093111515045166, "logits/rejected": -0.9047366976737976, "logps/chosen": -2.5890705585479736, "logps/rejected": -2.1931793689727783, "loss": 1.6468, "nll_loss": 1.5455259084701538, "rewards/accuracies": 0.625, "rewards/chosen": -0.2589070498943329, "rewards/margins": -0.03958909958600998, "rewards/rejected": -0.2193179726600647, "step": 559 }, { "epoch": 1.5331964407939767, "grad_norm": 4.983890533447266, "learning_rate": 9.232876712328766e-07, "log_odds_chosen": -0.6670010685920715, "log_odds_ratio": -1.2108700275421143, "logits/chosen": -0.8078041672706604, "logits/rejected": -0.7595615386962891, "logps/chosen": -3.880810499191284, "logps/rejected": -3.2164673805236816, "loss": 1.6889, "nll_loss": 1.5678292512893677, "rewards/accuracies": 0.375, "rewards/chosen": -0.38808107376098633, "rewards/margins": -0.06643432378768921, "rewards/rejected": -0.3216467499732971, "step": 560 }, { "epoch": 1.5359342915811087, "grad_norm": 3.9557244777679443, "learning_rate": 9.231506849315069e-07, "log_odds_chosen": 0.14371290802955627, "log_odds_ratio": -0.8408118486404419, "logits/chosen": -0.6622421741485596, "logits/rejected": -0.6293612122535706, "logps/chosen": -2.695791721343994, "logps/rejected": -2.806147575378418, "loss": 1.5834, "nll_loss": 1.499305009841919, "rewards/accuracies": 0.75, "rewards/chosen": -0.2695791721343994, "rewards/margins": 0.01103559136390686, "rewards/rejected": -0.2806147634983063, "step": 561 }, { "epoch": 1.5386721423682408, "grad_norm": 4.584468841552734, "learning_rate": 9.230136986301368e-07, "log_odds_chosen": -1.195691466331482, "log_odds_ratio": -1.871883511543274, "logits/chosen": -0.7675248384475708, "logits/rejected": -0.7564340829849243, "logps/chosen": -4.505504608154297, "logps/rejected": -3.305302619934082, "loss": 1.7358, "nll_loss": 1.5486509799957275, "rewards/accuracies": 0.375, "rewards/chosen": -0.45055049657821655, "rewards/margins": -0.12002024054527283, "rewards/rejected": -0.33053022623062134, "step": 562 }, { "epoch": 1.541409993155373, "grad_norm": 4.143622875213623, "learning_rate": 9.22876712328767e-07, "log_odds_chosen": -0.3032476603984833, "log_odds_ratio": -0.9738411903381348, "logits/chosen": -0.8990013599395752, "logits/rejected": -0.8677250146865845, "logps/chosen": -2.5746171474456787, "logps/rejected": -2.266345500946045, "loss": 1.5585, "nll_loss": 1.4611246585845947, "rewards/accuracies": 0.375, "rewards/chosen": -0.25746169686317444, "rewards/margins": -0.030827151611447334, "rewards/rejected": -0.22663456201553345, "step": 563 }, { "epoch": 1.5441478439425051, "grad_norm": 4.327102184295654, "learning_rate": 9.227397260273973e-07, "log_odds_chosen": -0.6671250462532043, "log_odds_ratio": -1.2456134557724, "logits/chosen": -0.8756723403930664, "logits/rejected": -0.8348996639251709, "logps/chosen": -3.2061612606048584, "logps/rejected": -2.5632429122924805, "loss": 1.7283, "nll_loss": 1.6037278175354004, "rewards/accuracies": 0.5, "rewards/chosen": -0.32061612606048584, "rewards/margins": -0.0642918199300766, "rewards/rejected": -0.25632429122924805, "step": 564 }, { "epoch": 1.5468856947296372, "grad_norm": 4.2100749015808105, "learning_rate": 9.226027397260274e-07, "log_odds_chosen": 0.3257005512714386, "log_odds_ratio": -0.7629778981208801, "logits/chosen": -0.813829779624939, "logits/rejected": -0.8635381460189819, "logps/chosen": -2.7038769721984863, "logps/rejected": -2.9986729621887207, "loss": 1.6491, "nll_loss": 1.5727763175964355, "rewards/accuracies": 0.5, "rewards/chosen": -0.2703877091407776, "rewards/margins": 0.02947959117591381, "rewards/rejected": -0.29986730217933655, "step": 565 }, { "epoch": 1.5496235455167693, "grad_norm": 3.5697178840637207, "learning_rate": 9.224657534246575e-07, "log_odds_chosen": 1.0469906330108643, "log_odds_ratio": -0.7885810732841492, "logits/chosen": -0.6884782910346985, "logits/rejected": -0.7127793431282043, "logps/chosen": -2.47456955909729, "logps/rejected": -3.3730735778808594, "loss": 1.5666, "nll_loss": 1.4877359867095947, "rewards/accuracies": 0.75, "rewards/chosen": -0.24745695292949677, "rewards/margins": 0.08985041081905365, "rewards/rejected": -0.3373073935508728, "step": 566 }, { "epoch": 1.5523613963039016, "grad_norm": 3.500093936920166, "learning_rate": 9.223287671232877e-07, "log_odds_chosen": 0.23311948776245117, "log_odds_ratio": -0.7190515995025635, "logits/chosen": -0.7875670194625854, "logits/rejected": -0.8107863664627075, "logps/chosen": -2.463074207305908, "logps/rejected": -2.632930278778076, "loss": 1.6381, "nll_loss": 1.5662204027175903, "rewards/accuracies": 0.75, "rewards/chosen": -0.24630743265151978, "rewards/margins": 0.016985639929771423, "rewards/rejected": -0.26329305768013, "step": 567 }, { "epoch": 1.5550992470910336, "grad_norm": 3.2691280841827393, "learning_rate": 9.221917808219178e-07, "log_odds_chosen": 0.48336148262023926, "log_odds_ratio": -0.5890149474143982, "logits/chosen": -0.8065637946128845, "logits/rejected": -0.8976701498031616, "logps/chosen": -2.11301589012146, "logps/rejected": -2.478161096572876, "loss": 1.5527, "nll_loss": 1.493821382522583, "rewards/accuracies": 0.75, "rewards/chosen": -0.21130160987377167, "rewards/margins": 0.036514513194561005, "rewards/rejected": -0.24781611561775208, "step": 568 }, { "epoch": 1.5578370978781657, "grad_norm": 3.8890552520751953, "learning_rate": 9.220547945205479e-07, "log_odds_chosen": 0.2949620187282562, "log_odds_ratio": -0.82621830701828, "logits/chosen": -0.7139371037483215, "logits/rejected": -0.743560254573822, "logps/chosen": -2.89048433303833, "logps/rejected": -3.1311275959014893, "loss": 1.6385, "nll_loss": 1.5559197664260864, "rewards/accuracies": 0.625, "rewards/chosen": -0.2890484631061554, "rewards/margins": 0.024064335972070694, "rewards/rejected": -0.3131127953529358, "step": 569 }, { "epoch": 1.5605749486652978, "grad_norm": 3.5431125164031982, "learning_rate": 9.219178082191781e-07, "log_odds_chosen": 0.6495088934898376, "log_odds_ratio": -0.49263668060302734, "logits/chosen": -0.676914632320404, "logits/rejected": -0.6965458393096924, "logps/chosen": -2.286825656890869, "logps/rejected": -2.8616888523101807, "loss": 1.4765, "nll_loss": 1.4272745847702026, "rewards/accuracies": 0.75, "rewards/chosen": -0.22868254780769348, "rewards/margins": 0.05748633295297623, "rewards/rejected": -0.2861689031124115, "step": 570 }, { "epoch": 1.5633127994524298, "grad_norm": 4.671868324279785, "learning_rate": 9.217808219178082e-07, "log_odds_chosen": -0.6655332446098328, "log_odds_ratio": -1.2061723470687866, "logits/chosen": -0.7858549356460571, "logits/rejected": -0.7038636207580566, "logps/chosen": -3.454758644104004, "logps/rejected": -2.796745777130127, "loss": 1.6104, "nll_loss": 1.4897469282150269, "rewards/accuracies": 0.25, "rewards/chosen": -0.3454758822917938, "rewards/margins": -0.06580128520727158, "rewards/rejected": -0.27967455983161926, "step": 571 }, { "epoch": 1.566050650239562, "grad_norm": 3.9026288986206055, "learning_rate": 9.216438356164384e-07, "log_odds_chosen": 0.18483488261699677, "log_odds_ratio": -0.8000360727310181, "logits/chosen": -0.8403186202049255, "logits/rejected": -0.7779797315597534, "logps/chosen": -2.543536424636841, "logps/rejected": -2.677605390548706, "loss": 1.5888, "nll_loss": 1.5088200569152832, "rewards/accuracies": 0.75, "rewards/chosen": -0.2543536424636841, "rewards/margins": 0.013406887650489807, "rewards/rejected": -0.2677605152130127, "step": 572 }, { "epoch": 1.568788501026694, "grad_norm": 4.507944583892822, "learning_rate": 9.215068493150685e-07, "log_odds_chosen": 0.09586147964000702, "log_odds_ratio": -0.8551784157752991, "logits/chosen": -0.6975966691970825, "logits/rejected": -0.7711385488510132, "logps/chosen": -2.607342004776001, "logps/rejected": -2.6473751068115234, "loss": 1.5235, "nll_loss": 1.4379793405532837, "rewards/accuracies": 0.5, "rewards/chosen": -0.2607342004776001, "rewards/margins": 0.004003304988145828, "rewards/rejected": -0.2647375166416168, "step": 573 }, { "epoch": 1.571526351813826, "grad_norm": 3.6944026947021484, "learning_rate": 9.213698630136986e-07, "log_odds_chosen": 0.32016804814338684, "log_odds_ratio": -0.6184468269348145, "logits/chosen": -0.7995092272758484, "logits/rejected": -0.8300684690475464, "logps/chosen": -1.992793321609497, "logps/rejected": -2.2189688682556152, "loss": 1.4934, "nll_loss": 1.4315078258514404, "rewards/accuracies": 0.625, "rewards/chosen": -0.19927933812141418, "rewards/margins": 0.02261757105588913, "rewards/rejected": -0.2218969166278839, "step": 574 }, { "epoch": 1.5742642026009581, "grad_norm": 3.798701047897339, "learning_rate": 9.212328767123288e-07, "log_odds_chosen": 1.1187779903411865, "log_odds_ratio": -0.521521806716919, "logits/chosen": -0.7374247312545776, "logits/rejected": -0.7641695737838745, "logps/chosen": -2.75607967376709, "logps/rejected": -3.821420669555664, "loss": 1.5663, "nll_loss": 1.5141067504882812, "rewards/accuracies": 0.625, "rewards/chosen": -0.2756079435348511, "rewards/margins": 0.10653415322303772, "rewards/rejected": -0.3821420967578888, "step": 575 }, { "epoch": 1.5770020533880902, "grad_norm": 4.294690132141113, "learning_rate": 9.210958904109588e-07, "log_odds_chosen": -0.3641813099384308, "log_odds_ratio": -1.001016616821289, "logits/chosen": -0.7258961200714111, "logits/rejected": -0.6960809230804443, "logps/chosen": -2.6156115531921387, "logps/rejected": -2.2514147758483887, "loss": 1.6108, "nll_loss": 1.5106745958328247, "rewards/accuracies": 0.375, "rewards/chosen": -0.26156115531921387, "rewards/margins": -0.03641967102885246, "rewards/rejected": -0.2251414954662323, "step": 576 }, { "epoch": 1.5797399041752225, "grad_norm": 3.2902677059173584, "learning_rate": 9.20958904109589e-07, "log_odds_chosen": 0.488048791885376, "log_odds_ratio": -0.5192888379096985, "logits/chosen": -0.8164684176445007, "logits/rejected": -0.8482871651649475, "logps/chosen": -2.1442198753356934, "logps/rejected": -2.561183452606201, "loss": 1.632, "nll_loss": 1.580117106437683, "rewards/accuracies": 0.75, "rewards/chosen": -0.21442201733589172, "rewards/margins": 0.04169635847210884, "rewards/rejected": -0.2561183571815491, "step": 577 }, { "epoch": 1.5824777549623545, "grad_norm": 4.044600963592529, "learning_rate": 9.208219178082192e-07, "log_odds_chosen": 0.6993350386619568, "log_odds_ratio": -0.44177109003067017, "logits/chosen": -0.7715595364570618, "logits/rejected": -0.7279488444328308, "logps/chosen": -2.01755952835083, "logps/rejected": -2.623823881149292, "loss": 1.4353, "nll_loss": 1.3910797834396362, "rewards/accuracies": 0.875, "rewards/chosen": -0.20175595581531525, "rewards/margins": 0.06062641739845276, "rewards/rejected": -0.2623823881149292, "step": 578 }, { "epoch": 1.5852156057494866, "grad_norm": 3.5155091285705566, "learning_rate": 9.206849315068493e-07, "log_odds_chosen": 0.04989083111286163, "log_odds_ratio": -0.7401337623596191, "logits/chosen": -0.7238362431526184, "logits/rejected": -0.7941597700119019, "logps/chosen": -2.4867303371429443, "logps/rejected": -2.504185914993286, "loss": 1.5665, "nll_loss": 1.4924976825714111, "rewards/accuracies": 0.5, "rewards/chosen": -0.24867302179336548, "rewards/margins": 0.0017455853521823883, "rewards/rejected": -0.25041860342025757, "step": 579 }, { "epoch": 1.587953456536619, "grad_norm": 3.550213098526001, "learning_rate": 9.205479452054794e-07, "log_odds_chosen": 0.008229821920394897, "log_odds_ratio": -0.7873997688293457, "logits/chosen": -0.7070971727371216, "logits/rejected": -0.7632534503936768, "logps/chosen": -2.73831844329834, "logps/rejected": -2.7092068195343018, "loss": 1.5173, "nll_loss": 1.4385645389556885, "rewards/accuracies": 0.625, "rewards/chosen": -0.273831844329834, "rewards/margins": -0.002911170944571495, "rewards/rejected": -0.27092069387435913, "step": 580 }, { "epoch": 1.590691307323751, "grad_norm": 4.089909076690674, "learning_rate": 9.204109589041096e-07, "log_odds_chosen": 0.11993440985679626, "log_odds_ratio": -0.8325083255767822, "logits/chosen": -0.8116704821586609, "logits/rejected": -0.7873951196670532, "logps/chosen": -2.9990346431732178, "logps/rejected": -3.1007080078125, "loss": 1.5617, "nll_loss": 1.4784830808639526, "rewards/accuracies": 0.625, "rewards/chosen": -0.2999034821987152, "rewards/margins": 0.010167326778173447, "rewards/rejected": -0.31007078289985657, "step": 581 }, { "epoch": 1.593429158110883, "grad_norm": 3.504678249359131, "learning_rate": 9.202739726027397e-07, "log_odds_chosen": 0.5713675618171692, "log_odds_ratio": -0.49547359347343445, "logits/chosen": -0.8169533014297485, "logits/rejected": -0.8601247072219849, "logps/chosen": -2.3086490631103516, "logps/rejected": -2.8208274841308594, "loss": 1.5029, "nll_loss": 1.453303575515747, "rewards/accuracies": 0.875, "rewards/chosen": -0.23086491227149963, "rewards/margins": 0.051217854022979736, "rewards/rejected": -0.28208276629447937, "step": 582 }, { "epoch": 1.596167008898015, "grad_norm": 4.522462844848633, "learning_rate": 9.201369863013698e-07, "log_odds_chosen": -0.6159318685531616, "log_odds_ratio": -1.2470253705978394, "logits/chosen": -0.7617253065109253, "logits/rejected": -0.7954643964767456, "logps/chosen": -3.2007803916931152, "logps/rejected": -2.595381498336792, "loss": 1.5938, "nll_loss": 1.469145655632019, "rewards/accuracies": 0.375, "rewards/chosen": -0.320078045129776, "rewards/margins": -0.06053987890481949, "rewards/rejected": -0.2595381438732147, "step": 583 }, { "epoch": 1.5989048596851472, "grad_norm": 4.373702049255371, "learning_rate": 9.2e-07, "log_odds_chosen": -0.041272684931755066, "log_odds_ratio": -1.0736088752746582, "logits/chosen": -0.7142125368118286, "logits/rejected": -0.7287464737892151, "logps/chosen": -3.0683505535125732, "logps/rejected": -3.0017645359039307, "loss": 1.5976, "nll_loss": 1.4902600049972534, "rewards/accuracies": 0.5, "rewards/chosen": -0.3068350553512573, "rewards/margins": -0.006658583879470825, "rewards/rejected": -0.3001764714717865, "step": 584 }, { "epoch": 1.6016427104722792, "grad_norm": 3.8715949058532715, "learning_rate": 9.198630136986301e-07, "log_odds_chosen": 0.2519274353981018, "log_odds_ratio": -0.6454613208770752, "logits/chosen": -0.6190153360366821, "logits/rejected": -0.6376731991767883, "logps/chosen": -2.1988649368286133, "logps/rejected": -2.3845765590667725, "loss": 1.503, "nll_loss": 1.438428521156311, "rewards/accuracies": 0.625, "rewards/chosen": -0.21988651156425476, "rewards/margins": 0.018571151420474052, "rewards/rejected": -0.23845766484737396, "step": 585 }, { "epoch": 1.6043805612594113, "grad_norm": 4.905228137969971, "learning_rate": 9.197260273972603e-07, "log_odds_chosen": -0.9957119822502136, "log_odds_ratio": -1.8930413722991943, "logits/chosen": -0.6997741460800171, "logits/rejected": -0.6881327629089355, "logps/chosen": -4.582285404205322, "logps/rejected": -3.583275556564331, "loss": 1.7259, "nll_loss": 1.536604404449463, "rewards/accuracies": 0.5, "rewards/chosen": -0.45822858810424805, "rewards/margins": -0.09990101307630539, "rewards/rejected": -0.35832756757736206, "step": 586 }, { "epoch": 1.6071184120465434, "grad_norm": 4.118900299072266, "learning_rate": 9.195890410958904e-07, "log_odds_chosen": -0.33002904057502747, "log_odds_ratio": -0.9660720825195312, "logits/chosen": -0.7134832739830017, "logits/rejected": -0.6938896179199219, "logps/chosen": -3.190868854522705, "logps/rejected": -2.8345651626586914, "loss": 1.6074, "nll_loss": 1.5107998847961426, "rewards/accuracies": 0.5, "rewards/chosen": -0.31908687949180603, "rewards/margins": -0.03563036024570465, "rewards/rejected": -0.2834565341472626, "step": 587 }, { "epoch": 1.6098562628336754, "grad_norm": 3.7450411319732666, "learning_rate": 9.194520547945205e-07, "log_odds_chosen": 0.750029444694519, "log_odds_ratio": -0.570886492729187, "logits/chosen": -0.652167022228241, "logits/rejected": -0.6521034836769104, "logps/chosen": -2.180447578430176, "logps/rejected": -2.853379726409912, "loss": 1.5296, "nll_loss": 1.4725545644760132, "rewards/accuracies": 0.75, "rewards/chosen": -0.21804475784301758, "rewards/margins": 0.06729321926832199, "rewards/rejected": -0.28533798456192017, "step": 588 }, { "epoch": 1.6125941136208075, "grad_norm": 4.066911220550537, "learning_rate": 9.193150684931507e-07, "log_odds_chosen": -0.27220797538757324, "log_odds_ratio": -1.0675938129425049, "logits/chosen": -0.6305994391441345, "logits/rejected": -0.6714895963668823, "logps/chosen": -2.814018964767456, "logps/rejected": -2.4820032119750977, "loss": 1.6394, "nll_loss": 1.5325958728790283, "rewards/accuracies": 0.5, "rewards/chosen": -0.2814019024372101, "rewards/margins": -0.033201564103364944, "rewards/rejected": -0.24820032715797424, "step": 589 }, { "epoch": 1.6153319644079398, "grad_norm": 3.326571226119995, "learning_rate": 9.191780821917808e-07, "log_odds_chosen": -0.12957851588726044, "log_odds_ratio": -0.8420898914337158, "logits/chosen": -0.7052600383758545, "logits/rejected": -0.7798614501953125, "logps/chosen": -3.162468910217285, "logps/rejected": -3.0146279335021973, "loss": 1.5724, "nll_loss": 1.4882258176803589, "rewards/accuracies": 0.25, "rewards/chosen": -0.316246896982193, "rewards/margins": -0.014784112572669983, "rewards/rejected": -0.3014627695083618, "step": 590 }, { "epoch": 1.6180698151950719, "grad_norm": 3.6265389919281006, "learning_rate": 9.190410958904109e-07, "log_odds_chosen": 0.14518776535987854, "log_odds_ratio": -0.8386099338531494, "logits/chosen": -0.7117173075675964, "logits/rejected": -0.727399468421936, "logps/chosen": -2.8637170791625977, "logps/rejected": -2.9712038040161133, "loss": 1.557, "nll_loss": 1.4731736183166504, "rewards/accuracies": 0.5, "rewards/chosen": -0.28637170791625977, "rewards/margins": 0.010748691856861115, "rewards/rejected": -0.2971203923225403, "step": 591 }, { "epoch": 1.620807665982204, "grad_norm": 3.858430862426758, "learning_rate": 9.189041095890411e-07, "log_odds_chosen": 0.4529445171356201, "log_odds_ratio": -0.7423229217529297, "logits/chosen": -0.7392657995223999, "logits/rejected": -0.7863273620605469, "logps/chosen": -2.8273820877075195, "logps/rejected": -3.2673652172088623, "loss": 1.5847, "nll_loss": 1.5104562044143677, "rewards/accuracies": 0.625, "rewards/chosen": -0.28273820877075195, "rewards/margins": 0.043998315930366516, "rewards/rejected": -0.3267365097999573, "step": 592 }, { "epoch": 1.6235455167693362, "grad_norm": 4.449395179748535, "learning_rate": 9.187671232876712e-07, "log_odds_chosen": -0.4263470470905304, "log_odds_ratio": -0.9571019411087036, "logits/chosen": -0.7090526819229126, "logits/rejected": -0.6126908659934998, "logps/chosen": -3.34026837348938, "logps/rejected": -2.9479994773864746, "loss": 1.5894, "nll_loss": 1.4937182664871216, "rewards/accuracies": 0.125, "rewards/chosen": -0.33402684330940247, "rewards/margins": -0.03922690078616142, "rewards/rejected": -0.29479992389678955, "step": 593 }, { "epoch": 1.6262833675564683, "grad_norm": 5.124985694885254, "learning_rate": 9.186301369863013e-07, "log_odds_chosen": -0.6700010895729065, "log_odds_ratio": -1.2891101837158203, "logits/chosen": -0.6659747958183289, "logits/rejected": -0.6148614883422852, "logps/chosen": -3.513915777206421, "logps/rejected": -2.9051735401153564, "loss": 1.5519, "nll_loss": 1.4229556322097778, "rewards/accuracies": 0.25, "rewards/chosen": -0.3513915538787842, "rewards/margins": -0.060874223709106445, "rewards/rejected": -0.29051733016967773, "step": 594 }, { "epoch": 1.6290212183436004, "grad_norm": 3.6462035179138184, "learning_rate": 9.184931506849315e-07, "log_odds_chosen": 0.29658907651901245, "log_odds_ratio": -0.5641794204711914, "logits/chosen": -0.7188773155212402, "logits/rejected": -0.691906213760376, "logps/chosen": -1.5908652544021606, "logps/rejected": -1.8187446594238281, "loss": 1.4097, "nll_loss": 1.3532960414886475, "rewards/accuracies": 0.75, "rewards/chosen": -0.15908654034137726, "rewards/margins": 0.022787917405366898, "rewards/rejected": -0.18187445402145386, "step": 595 }, { "epoch": 1.6317590691307324, "grad_norm": 4.06154727935791, "learning_rate": 9.183561643835616e-07, "log_odds_chosen": -0.21319645643234253, "log_odds_ratio": -0.879133403301239, "logits/chosen": -0.7352735996246338, "logits/rejected": -0.6969001293182373, "logps/chosen": -2.6231882572174072, "logps/rejected": -2.414412021636963, "loss": 1.4985, "nll_loss": 1.4105453491210938, "rewards/accuracies": 0.5, "rewards/chosen": -0.26231884956359863, "rewards/margins": -0.020877614617347717, "rewards/rejected": -0.24144122004508972, "step": 596 }, { "epoch": 1.6344969199178645, "grad_norm": 3.453672170639038, "learning_rate": 9.182191780821917e-07, "log_odds_chosen": -0.19986435770988464, "log_odds_ratio": -0.9235107898712158, "logits/chosen": -0.6453060507774353, "logits/rejected": -0.6348427534103394, "logps/chosen": -2.2518858909606934, "logps/rejected": -2.0438623428344727, "loss": 1.5492, "nll_loss": 1.4568936824798584, "rewards/accuracies": 0.75, "rewards/chosen": -0.22518858313560486, "rewards/margins": -0.02080235630273819, "rewards/rejected": -0.20438623428344727, "step": 597 }, { "epoch": 1.6372347707049966, "grad_norm": 3.74711275100708, "learning_rate": 9.180821917808219e-07, "log_odds_chosen": 0.3573321998119354, "log_odds_ratio": -0.5739903450012207, "logits/chosen": -0.6813241243362427, "logits/rejected": -0.6697196960449219, "logps/chosen": -2.686086893081665, "logps/rejected": -2.992563009262085, "loss": 1.4741, "nll_loss": 1.4167392253875732, "rewards/accuracies": 0.5, "rewards/chosen": -0.2686086893081665, "rewards/margins": 0.030647605657577515, "rewards/rejected": -0.299256294965744, "step": 598 }, { "epoch": 1.6399726214921286, "grad_norm": 4.230123996734619, "learning_rate": 9.17945205479452e-07, "log_odds_chosen": -0.17626570165157318, "log_odds_ratio": -0.9411473870277405, "logits/chosen": -0.7360097169876099, "logits/rejected": -0.7512553334236145, "logps/chosen": -2.5025503635406494, "logps/rejected": -2.2447922229766846, "loss": 1.4697, "nll_loss": 1.3756102323532104, "rewards/accuracies": 0.5, "rewards/chosen": -0.2502550482749939, "rewards/margins": -0.025775814428925514, "rewards/rejected": -0.22447922825813293, "step": 599 }, { "epoch": 1.6427104722792607, "grad_norm": 4.094789028167725, "learning_rate": 9.178082191780822e-07, "log_odds_chosen": 0.5635699033737183, "log_odds_ratio": -0.6863358020782471, "logits/chosen": -0.696662962436676, "logits/rejected": -0.7043269872665405, "logps/chosen": -2.6054391860961914, "logps/rejected": -3.108402729034424, "loss": 1.469, "nll_loss": 1.4003431797027588, "rewards/accuracies": 0.625, "rewards/chosen": -0.26054391264915466, "rewards/margins": 0.050296343863010406, "rewards/rejected": -0.31084027886390686, "step": 600 }, { "epoch": 1.6454483230663928, "grad_norm": 4.311732292175293, "learning_rate": 9.176712328767123e-07, "log_odds_chosen": -0.5276241302490234, "log_odds_ratio": -1.0532872676849365, "logits/chosen": -0.7500609755516052, "logits/rejected": -0.6843471527099609, "logps/chosen": -2.6880760192871094, "logps/rejected": -2.2173011302948, "loss": 1.5036, "nll_loss": 1.3983173370361328, "rewards/accuracies": 0.25, "rewards/chosen": -0.2688075602054596, "rewards/margins": -0.047077469527721405, "rewards/rejected": -0.22173011302947998, "step": 601 }, { "epoch": 1.6481861738535248, "grad_norm": 3.7573094367980957, "learning_rate": 9.175342465753424e-07, "log_odds_chosen": -0.4007585942745209, "log_odds_ratio": -1.0246005058288574, "logits/chosen": -0.7069658041000366, "logits/rejected": -0.7453099489212036, "logps/chosen": -3.1686384677886963, "logps/rejected": -2.7793447971343994, "loss": 1.5751, "nll_loss": 1.4726855754852295, "rewards/accuracies": 0.25, "rewards/chosen": -0.31686389446258545, "rewards/margins": -0.03892941027879715, "rewards/rejected": -0.2779344618320465, "step": 602 }, { "epoch": 1.6509240246406571, "grad_norm": 4.832777500152588, "learning_rate": 9.173972602739726e-07, "log_odds_chosen": -0.18980330228805542, "log_odds_ratio": -0.989465594291687, "logits/chosen": -0.5663728713989258, "logits/rejected": -0.6120015382766724, "logps/chosen": -3.2094271183013916, "logps/rejected": -2.962949514389038, "loss": 1.5148, "nll_loss": 1.4158611297607422, "rewards/accuracies": 0.5, "rewards/chosen": -0.3209426999092102, "rewards/margins": -0.024647772312164307, "rewards/rejected": -0.2962949275970459, "step": 603 }, { "epoch": 1.6536618754277892, "grad_norm": 3.5884525775909424, "learning_rate": 9.172602739726027e-07, "log_odds_chosen": 0.6425407528877258, "log_odds_ratio": -0.7733391523361206, "logits/chosen": -0.7062439918518066, "logits/rejected": -0.6925845146179199, "logps/chosen": -2.2770094871520996, "logps/rejected": -2.8653950691223145, "loss": 1.5057, "nll_loss": 1.4283441305160522, "rewards/accuracies": 0.625, "rewards/chosen": -0.22770094871520996, "rewards/margins": 0.05883856862783432, "rewards/rejected": -0.2865395247936249, "step": 604 }, { "epoch": 1.6563997262149213, "grad_norm": 4.436399936676025, "learning_rate": 9.171232876712328e-07, "log_odds_chosen": -0.2624358832836151, "log_odds_ratio": -1.0210434198379517, "logits/chosen": -0.6710184216499329, "logits/rejected": -0.6105521321296692, "logps/chosen": -2.7021124362945557, "logps/rejected": -2.4489758014678955, "loss": 1.4579, "nll_loss": 1.3558156490325928, "rewards/accuracies": 0.625, "rewards/chosen": -0.27021124958992004, "rewards/margins": -0.025313671678304672, "rewards/rejected": -0.24489757418632507, "step": 605 }, { "epoch": 1.6591375770020536, "grad_norm": 3.4602296352386475, "learning_rate": 9.16986301369863e-07, "log_odds_chosen": 0.45017337799072266, "log_odds_ratio": -0.5959828495979309, "logits/chosen": -0.6766421794891357, "logits/rejected": -0.6961017847061157, "logps/chosen": -1.6809386014938354, "logps/rejected": -2.032810926437378, "loss": 1.4294, "nll_loss": 1.3698402643203735, "rewards/accuracies": 0.625, "rewards/chosen": -0.16809387505054474, "rewards/margins": 0.03518722578883171, "rewards/rejected": -0.20328110456466675, "step": 606 }, { "epoch": 1.6618754277891856, "grad_norm": 3.4823291301727295, "learning_rate": 9.168493150684931e-07, "log_odds_chosen": 0.24867822229862213, "log_odds_ratio": -0.7579395771026611, "logits/chosen": -0.6294485330581665, "logits/rejected": -0.6563166379928589, "logps/chosen": -2.419281005859375, "logps/rejected": -2.618391990661621, "loss": 1.4772, "nll_loss": 1.4014054536819458, "rewards/accuracies": 0.5, "rewards/chosen": -0.2419281005859375, "rewards/margins": 0.01991110108792782, "rewards/rejected": -0.26183921098709106, "step": 607 }, { "epoch": 1.6646132785763177, "grad_norm": 3.902769088745117, "learning_rate": 9.167123287671232e-07, "log_odds_chosen": -0.3697505593299866, "log_odds_ratio": -1.0742870569229126, "logits/chosen": -0.6053754091262817, "logits/rejected": -0.5794192552566528, "logps/chosen": -3.0772817134857178, "logps/rejected": -2.674452781677246, "loss": 1.6209, "nll_loss": 1.513441801071167, "rewards/accuracies": 0.5, "rewards/chosen": -0.30772820115089417, "rewards/margins": -0.04028291255235672, "rewards/rejected": -0.26744526624679565, "step": 608 }, { "epoch": 1.6673511293634498, "grad_norm": 3.8635919094085693, "learning_rate": 9.165753424657534e-07, "log_odds_chosen": 0.5956275463104248, "log_odds_ratio": -0.5162099003791809, "logits/chosen": -0.7259917855262756, "logits/rejected": -0.7540243864059448, "logps/chosen": -2.5552635192871094, "logps/rejected": -3.1083321571350098, "loss": 1.448, "nll_loss": 1.396370768547058, "rewards/accuracies": 0.75, "rewards/chosen": -0.2555263638496399, "rewards/margins": 0.05530688911676407, "rewards/rejected": -0.310833215713501, "step": 609 }, { "epoch": 1.6700889801505818, "grad_norm": 4.227957248687744, "learning_rate": 9.164383561643835e-07, "log_odds_chosen": 0.6942138075828552, "log_odds_ratio": -0.5231826901435852, "logits/chosen": -0.63584965467453, "logits/rejected": -0.5767515897750854, "logps/chosen": -2.133423328399658, "logps/rejected": -2.708317518234253, "loss": 1.4409, "nll_loss": 1.3885776996612549, "rewards/accuracies": 0.75, "rewards/chosen": -0.2133423388004303, "rewards/margins": 0.05748940259218216, "rewards/rejected": -0.27083176374435425, "step": 610 }, { "epoch": 1.672826830937714, "grad_norm": 3.274143695831299, "learning_rate": 9.163013698630136e-07, "log_odds_chosen": 0.06788966059684753, "log_odds_ratio": -0.7142007350921631, "logits/chosen": -0.6956030130386353, "logits/rejected": -0.7689128518104553, "logps/chosen": -2.253411293029785, "logps/rejected": -2.2891159057617188, "loss": 1.5123, "nll_loss": 1.4408681392669678, "rewards/accuracies": 0.625, "rewards/chosen": -0.22534114122390747, "rewards/margins": 0.003570450469851494, "rewards/rejected": -0.22891157865524292, "step": 611 }, { "epoch": 1.675564681724846, "grad_norm": 4.121723651885986, "learning_rate": 9.161643835616438e-07, "log_odds_chosen": -0.740554928779602, "log_odds_ratio": -1.2362923622131348, "logits/chosen": -0.602120578289032, "logits/rejected": -0.5539164543151855, "logps/chosen": -2.8788766860961914, "logps/rejected": -2.187525749206543, "loss": 1.506, "nll_loss": 1.3824132680892944, "rewards/accuracies": 0.375, "rewards/chosen": -0.28788766264915466, "rewards/margins": -0.06913506984710693, "rewards/rejected": -0.21875259280204773, "step": 612 }, { "epoch": 1.678302532511978, "grad_norm": 4.842816352844238, "learning_rate": 9.160273972602739e-07, "log_odds_chosen": -0.5459595918655396, "log_odds_ratio": -1.2378363609313965, "logits/chosen": -0.6469624638557434, "logits/rejected": -0.6185622811317444, "logps/chosen": -3.3441028594970703, "logps/rejected": -2.787259578704834, "loss": 1.5504, "nll_loss": 1.4265693426132202, "rewards/accuracies": 0.5, "rewards/chosen": -0.33441027998924255, "rewards/margins": -0.05568433180451393, "rewards/rejected": -0.2787259519100189, "step": 613 }, { "epoch": 1.68104038329911, "grad_norm": 3.5958609580993652, "learning_rate": 9.158904109589041e-07, "log_odds_chosen": -0.1040971428155899, "log_odds_ratio": -0.9252364039421082, "logits/chosen": -0.7345307469367981, "logits/rejected": -0.7606532573699951, "logps/chosen": -2.387450695037842, "logps/rejected": -2.288982391357422, "loss": 1.4844, "nll_loss": 1.391880989074707, "rewards/accuracies": 0.625, "rewards/chosen": -0.2387450933456421, "rewards/margins": -0.009846853092312813, "rewards/rejected": -0.22889822721481323, "step": 614 }, { "epoch": 1.6837782340862422, "grad_norm": 3.544332981109619, "learning_rate": 9.157534246575342e-07, "log_odds_chosen": 0.2068464308977127, "log_odds_ratio": -0.7220398187637329, "logits/chosen": -0.7250076532363892, "logits/rejected": -0.7557945251464844, "logps/chosen": -2.5042295455932617, "logps/rejected": -2.6775269508361816, "loss": 1.5345, "nll_loss": 1.4623427391052246, "rewards/accuracies": 0.75, "rewards/chosen": -0.25042295455932617, "rewards/margins": 0.017329735681414604, "rewards/rejected": -0.2677527070045471, "step": 615 }, { "epoch": 1.6865160848733745, "grad_norm": 2.8711612224578857, "learning_rate": 9.156164383561643e-07, "log_odds_chosen": 1.0309929847717285, "log_odds_ratio": -0.4840540587902069, "logits/chosen": -0.5974233150482178, "logits/rejected": -0.7395235300064087, "logps/chosen": -1.885789155960083, "logps/rejected": -2.78650164604187, "loss": 1.4581, "nll_loss": 1.4096460342407227, "rewards/accuracies": 0.75, "rewards/chosen": -0.18857893347740173, "rewards/margins": 0.09007124602794647, "rewards/rejected": -0.278650164604187, "step": 616 }, { "epoch": 1.6892539356605065, "grad_norm": 3.8400096893310547, "learning_rate": 9.154794520547945e-07, "log_odds_chosen": 0.02273094654083252, "log_odds_ratio": -0.8851383924484253, "logits/chosen": -0.5458869338035583, "logits/rejected": -0.5522079467773438, "logps/chosen": -2.7392220497131348, "logps/rejected": -2.6841320991516113, "loss": 1.484, "nll_loss": 1.3954641819000244, "rewards/accuracies": 0.75, "rewards/chosen": -0.2739222049713135, "rewards/margins": -0.005508970469236374, "rewards/rejected": -0.2684132158756256, "step": 617 }, { "epoch": 1.6919917864476386, "grad_norm": 3.3086562156677246, "learning_rate": 9.153424657534246e-07, "log_odds_chosen": 0.11355168372392654, "log_odds_ratio": -0.7929800152778625, "logits/chosen": -0.6522195935249329, "logits/rejected": -0.6731007099151611, "logps/chosen": -2.373101234436035, "logps/rejected": -2.4329237937927246, "loss": 1.4997, "nll_loss": 1.4204145669937134, "rewards/accuracies": 0.75, "rewards/chosen": -0.23731011152267456, "rewards/margins": 0.005982253700494766, "rewards/rejected": -0.24329236149787903, "step": 618 }, { "epoch": 1.6947296372347707, "grad_norm": 4.281066417694092, "learning_rate": 9.152054794520547e-07, "log_odds_chosen": -0.08669845759868622, "log_odds_ratio": -0.8985964059829712, "logits/chosen": -0.7393370866775513, "logits/rejected": -0.7001340985298157, "logps/chosen": -2.644913911819458, "logps/rejected": -2.5187697410583496, "loss": 1.4791, "nll_loss": 1.3892379999160767, "rewards/accuracies": 0.75, "rewards/chosen": -0.26449140906333923, "rewards/margins": -0.012614430859684944, "rewards/rejected": -0.25187698006629944, "step": 619 }, { "epoch": 1.697467488021903, "grad_norm": 4.124050140380859, "learning_rate": 9.150684931506849e-07, "log_odds_chosen": -0.15326935052871704, "log_odds_ratio": -0.8454866409301758, "logits/chosen": -0.650210976600647, "logits/rejected": -0.6190853714942932, "logps/chosen": -2.826958656311035, "logps/rejected": -2.651381731033325, "loss": 1.5315, "nll_loss": 1.446921706199646, "rewards/accuracies": 0.5, "rewards/chosen": -0.28269585967063904, "rewards/margins": -0.017557689920067787, "rewards/rejected": -0.265138179063797, "step": 620 }, { "epoch": 1.700205338809035, "grad_norm": 3.729189157485962, "learning_rate": 9.149315068493151e-07, "log_odds_chosen": 1.030922770500183, "log_odds_ratio": -0.48139268159866333, "logits/chosen": -0.6409726142883301, "logits/rejected": -0.6767932176589966, "logps/chosen": -2.044100284576416, "logps/rejected": -2.912886142730713, "loss": 1.4096, "nll_loss": 1.361493706703186, "rewards/accuracies": 0.875, "rewards/chosen": -0.20441001653671265, "rewards/margins": 0.08687859028577805, "rewards/rejected": -0.2912886142730713, "step": 621 }, { "epoch": 1.702943189596167, "grad_norm": 3.8529555797576904, "learning_rate": 9.147945205479451e-07, "log_odds_chosen": -0.2021614909172058, "log_odds_ratio": -1.0965722799301147, "logits/chosen": -0.6963780522346497, "logits/rejected": -0.6912370324134827, "logps/chosen": -2.8408854007720947, "logps/rejected": -2.6141769886016846, "loss": 1.5756, "nll_loss": 1.465956211090088, "rewards/accuracies": 0.5, "rewards/chosen": -0.2840885519981384, "rewards/margins": -0.022670840844511986, "rewards/rejected": -0.2614176869392395, "step": 622 }, { "epoch": 1.7056810403832992, "grad_norm": 3.515683650970459, "learning_rate": 9.146575342465753e-07, "log_odds_chosen": 0.4936247766017914, "log_odds_ratio": -0.5352454781532288, "logits/chosen": -0.669935941696167, "logits/rejected": -0.7031093239784241, "logps/chosen": -2.2538323402404785, "logps/rejected": -2.691709518432617, "loss": 1.4901, "nll_loss": 1.4365787506103516, "rewards/accuracies": 0.75, "rewards/chosen": -0.2253832221031189, "rewards/margins": 0.04378772899508476, "rewards/rejected": -0.26917096972465515, "step": 623 }, { "epoch": 1.7084188911704312, "grad_norm": 4.644213676452637, "learning_rate": 9.145205479452054e-07, "log_odds_chosen": -0.11163648962974548, "log_odds_ratio": -0.8795976638793945, "logits/chosen": -0.6504278182983398, "logits/rejected": -0.5831124782562256, "logps/chosen": -3.2448580265045166, "logps/rejected": -3.1524152755737305, "loss": 1.3912, "nll_loss": 1.3032386302947998, "rewards/accuracies": 0.375, "rewards/chosen": -0.32448580861091614, "rewards/margins": -0.009244285523891449, "rewards/rejected": -0.3152415156364441, "step": 624 }, { "epoch": 1.7111567419575633, "grad_norm": 4.239314079284668, "learning_rate": 9.143835616438355e-07, "log_odds_chosen": -0.7086701393127441, "log_odds_ratio": -1.3550200462341309, "logits/chosen": -0.7677249908447266, "logits/rejected": -0.6588382720947266, "logps/chosen": -2.8907155990600586, "logps/rejected": -2.1902527809143066, "loss": 1.5064, "nll_loss": 1.3708953857421875, "rewards/accuracies": 0.5, "rewards/chosen": -0.28907155990600586, "rewards/margins": -0.07004625350236893, "rewards/rejected": -0.21902529895305634, "step": 625 }, { "epoch": 1.7138945927446954, "grad_norm": 2.8054006099700928, "learning_rate": 9.142465753424657e-07, "log_odds_chosen": 0.6664063334465027, "log_odds_ratio": -0.47918999195098877, "logits/chosen": -0.6527297496795654, "logits/rejected": -0.8215524554252625, "logps/chosen": -1.5910143852233887, "logps/rejected": -2.0871806144714355, "loss": 1.3898, "nll_loss": 1.341892957687378, "rewards/accuracies": 0.875, "rewards/chosen": -0.1591014415025711, "rewards/margins": 0.049616601318120956, "rewards/rejected": -0.20871806144714355, "step": 626 }, { "epoch": 1.7166324435318274, "grad_norm": 4.259734153747559, "learning_rate": 9.141095890410958e-07, "log_odds_chosen": -0.6990475654602051, "log_odds_ratio": -1.2407000064849854, "logits/chosen": -0.6968227624893188, "logits/rejected": -0.7104403972625732, "logps/chosen": -2.718717098236084, "logps/rejected": -2.066833019256592, "loss": 1.5445, "nll_loss": 1.4204202890396118, "rewards/accuracies": 0.25, "rewards/chosen": -0.27187174558639526, "rewards/margins": -0.06518843024969101, "rewards/rejected": -0.20668330788612366, "step": 627 }, { "epoch": 1.7193702943189595, "grad_norm": 3.895174503326416, "learning_rate": 9.13972602739726e-07, "log_odds_chosen": -0.5782614946365356, "log_odds_ratio": -1.2399240732192993, "logits/chosen": -0.5714415907859802, "logits/rejected": -0.5644890666007996, "logps/chosen": -3.197763681411743, "logps/rejected": -2.6134390830993652, "loss": 1.5408, "nll_loss": 1.4168410301208496, "rewards/accuracies": 0.5, "rewards/chosen": -0.31977635622024536, "rewards/margins": -0.0584324449300766, "rewards/rejected": -0.26134392619132996, "step": 628 }, { "epoch": 1.7221081451060916, "grad_norm": 3.707240343093872, "learning_rate": 9.138356164383561e-07, "log_odds_chosen": -0.39495229721069336, "log_odds_ratio": -0.959400475025177, "logits/chosen": -0.5529353022575378, "logits/rejected": -0.6289621591567993, "logps/chosen": -2.785360336303711, "logps/rejected": -2.39640736579895, "loss": 1.5194, "nll_loss": 1.4234915971755981, "rewards/accuracies": 0.375, "rewards/chosen": -0.27853602170944214, "rewards/margins": -0.03889530152082443, "rewards/rejected": -0.2396407425403595, "step": 629 }, { "epoch": 1.7248459958932238, "grad_norm": 3.4549691677093506, "learning_rate": 9.136986301369862e-07, "log_odds_chosen": 0.1838138997554779, "log_odds_ratio": -0.7653752565383911, "logits/chosen": -0.5114314556121826, "logits/rejected": -0.582766056060791, "logps/chosen": -2.609245777130127, "logps/rejected": -2.751941204071045, "loss": 1.448, "nll_loss": 1.3714733123779297, "rewards/accuracies": 0.75, "rewards/chosen": -0.2609245777130127, "rewards/margins": 0.014269538223743439, "rewards/rejected": -0.2751941382884979, "step": 630 }, { "epoch": 1.727583846680356, "grad_norm": 4.04160213470459, "learning_rate": 9.135616438356164e-07, "log_odds_chosen": 0.06949751079082489, "log_odds_ratio": -1.1624256372451782, "logits/chosen": -0.5864495635032654, "logits/rejected": -0.5669856667518616, "logps/chosen": -3.5578296184539795, "logps/rejected": -3.5794196128845215, "loss": 1.525, "nll_loss": 1.4087355136871338, "rewards/accuracies": 0.5, "rewards/chosen": -0.35578295588493347, "rewards/margins": 0.002158990129828453, "rewards/rejected": -0.35794195532798767, "step": 631 }, { "epoch": 1.730321697467488, "grad_norm": 3.5534212589263916, "learning_rate": 9.134246575342465e-07, "log_odds_chosen": 0.18419355154037476, "log_odds_ratio": -0.6287676095962524, "logits/chosen": -0.6561843752861023, "logits/rejected": -0.6520009636878967, "logps/chosen": -2.0174570083618164, "logps/rejected": -2.171391248703003, "loss": 1.363, "nll_loss": 1.3001364469528198, "rewards/accuracies": 0.75, "rewards/chosen": -0.20174571871757507, "rewards/margins": 0.015393429435789585, "rewards/rejected": -0.2171391248703003, "step": 632 }, { "epoch": 1.7330595482546203, "grad_norm": 3.1433112621307373, "learning_rate": 9.132876712328766e-07, "log_odds_chosen": -0.11827197670936584, "log_odds_ratio": -0.854607105255127, "logits/chosen": -0.6065737009048462, "logits/rejected": -0.6351750493049622, "logps/chosen": -2.243157386779785, "logps/rejected": -2.1054186820983887, "loss": 1.3735, "nll_loss": 1.2880313396453857, "rewards/accuracies": 0.5, "rewards/chosen": -0.22431574761867523, "rewards/margins": -0.013773881830275059, "rewards/rejected": -0.21054187417030334, "step": 633 }, { "epoch": 1.7357973990417523, "grad_norm": 3.9513442516326904, "learning_rate": 9.131506849315068e-07, "log_odds_chosen": -0.3275212049484253, "log_odds_ratio": -0.9586813449859619, "logits/chosen": -0.5909498929977417, "logits/rejected": -0.5518307685852051, "logps/chosen": -2.8642232418060303, "logps/rejected": -2.535435914993286, "loss": 1.4187, "nll_loss": 1.322826862335205, "rewards/accuracies": 0.25, "rewards/chosen": -0.2864223122596741, "rewards/margins": -0.032878708094358444, "rewards/rejected": -0.2535436153411865, "step": 634 }, { "epoch": 1.7385352498288844, "grad_norm": 4.731657028198242, "learning_rate": 9.13013698630137e-07, "log_odds_chosen": -0.1435621976852417, "log_odds_ratio": -0.9679070711135864, "logits/chosen": -0.5787023305892944, "logits/rejected": -0.5365256071090698, "logps/chosen": -2.9736554622650146, "logps/rejected": -2.8210301399230957, "loss": 1.4822, "nll_loss": 1.3854553699493408, "rewards/accuracies": 0.25, "rewards/chosen": -0.29736554622650146, "rewards/margins": -0.01526254415512085, "rewards/rejected": -0.2821030020713806, "step": 635 }, { "epoch": 1.7412731006160165, "grad_norm": 3.22153377532959, "learning_rate": 9.12876712328767e-07, "log_odds_chosen": 0.4571823179721832, "log_odds_ratio": -0.5661544799804688, "logits/chosen": -0.5662775039672852, "logits/rejected": -0.6137698888778687, "logps/chosen": -2.1532626152038574, "logps/rejected": -2.5647079944610596, "loss": 1.4158, "nll_loss": 1.359170913696289, "rewards/accuracies": 0.75, "rewards/chosen": -0.2153262495994568, "rewards/margins": 0.04114455729722977, "rewards/rejected": -0.25647079944610596, "step": 636 }, { "epoch": 1.7440109514031485, "grad_norm": 3.7604455947875977, "learning_rate": 9.127397260273972e-07, "log_odds_chosen": 1.0463430881500244, "log_odds_ratio": -0.644607663154602, "logits/chosen": -0.6072409749031067, "logits/rejected": -0.6393887996673584, "logps/chosen": -2.101487398147583, "logps/rejected": -3.0469560623168945, "loss": 1.3063, "nll_loss": 1.2418363094329834, "rewards/accuracies": 0.625, "rewards/chosen": -0.21014875173568726, "rewards/margins": 0.09454686939716339, "rewards/rejected": -0.30469560623168945, "step": 637 }, { "epoch": 1.7467488021902806, "grad_norm": 3.4504475593566895, "learning_rate": 9.126027397260273e-07, "log_odds_chosen": -0.057057544589042664, "log_odds_ratio": -0.9114596843719482, "logits/chosen": -0.6898106336593628, "logits/rejected": -0.6679890155792236, "logps/chosen": -2.3470089435577393, "logps/rejected": -2.295536994934082, "loss": 1.5215, "nll_loss": 1.4303841590881348, "rewards/accuracies": 0.75, "rewards/chosen": -0.23470090329647064, "rewards/margins": -0.005147211253643036, "rewards/rejected": -0.2295536994934082, "step": 638 }, { "epoch": 1.7494866529774127, "grad_norm": 4.106216907501221, "learning_rate": 9.124657534246574e-07, "log_odds_chosen": 0.4556281566619873, "log_odds_ratio": -0.6040598154067993, "logits/chosen": -0.5264626741409302, "logits/rejected": -0.4914592206478119, "logps/chosen": -2.8556647300720215, "logps/rejected": -3.277458667755127, "loss": 1.3851, "nll_loss": 1.324735403060913, "rewards/accuracies": 0.625, "rewards/chosen": -0.28556644916534424, "rewards/margins": 0.04217939078807831, "rewards/rejected": -0.32774585485458374, "step": 639 }, { "epoch": 1.7522245037645447, "grad_norm": 3.4155383110046387, "learning_rate": 9.123287671232876e-07, "log_odds_chosen": 0.5491211414337158, "log_odds_ratio": -0.9722954630851746, "logits/chosen": -0.6372462511062622, "logits/rejected": -0.634115993976593, "logps/chosen": -2.39841628074646, "logps/rejected": -2.859254837036133, "loss": 1.4309, "nll_loss": 1.333708643913269, "rewards/accuracies": 0.625, "rewards/chosen": -0.23984161019325256, "rewards/margins": 0.04608386009931564, "rewards/rejected": -0.2859255075454712, "step": 640 }, { "epoch": 1.7549623545516768, "grad_norm": 4.672419548034668, "learning_rate": 9.121917808219177e-07, "log_odds_chosen": -1.2936080694198608, "log_odds_ratio": -1.726144552230835, "logits/chosen": -0.6333926320075989, "logits/rejected": -0.6444054245948792, "logps/chosen": -3.7134289741516113, "logps/rejected": -2.430441379547119, "loss": 1.536, "nll_loss": 1.363369345664978, "rewards/accuracies": 0.375, "rewards/chosen": -0.37134286761283875, "rewards/margins": -0.12829874455928802, "rewards/rejected": -0.24304413795471191, "step": 641 }, { "epoch": 1.7577002053388089, "grad_norm": 3.583390474319458, "learning_rate": 9.120547945205478e-07, "log_odds_chosen": -0.11213263869285583, "log_odds_ratio": -1.154787302017212, "logits/chosen": -0.5153898596763611, "logits/rejected": -0.5835528373718262, "logps/chosen": -3.0309293270111084, "logps/rejected": -2.878427743911743, "loss": 1.5094, "nll_loss": 1.3939177989959717, "rewards/accuracies": 0.75, "rewards/chosen": -0.30309295654296875, "rewards/margins": -0.015250151976943016, "rewards/rejected": -0.2878427803516388, "step": 642 }, { "epoch": 1.7604380561259412, "grad_norm": 3.7641754150390625, "learning_rate": 9.11917808219178e-07, "log_odds_chosen": 0.032734423875808716, "log_odds_ratio": -0.803098201751709, "logits/chosen": -0.5169633626937866, "logits/rejected": -0.5605133771896362, "logps/chosen": -2.6120052337646484, "logps/rejected": -2.602065086364746, "loss": 1.362, "nll_loss": 1.2817366123199463, "rewards/accuracies": 0.75, "rewards/chosen": -0.26120054721832275, "rewards/margins": -0.0009940266609191895, "rewards/rejected": -0.26020652055740356, "step": 643 }, { "epoch": 1.7631759069130732, "grad_norm": 3.1244704723358154, "learning_rate": 9.117808219178082e-07, "log_odds_chosen": 0.4102553129196167, "log_odds_ratio": -0.6321101784706116, "logits/chosen": -0.5725901126861572, "logits/rejected": -0.6655693650245667, "logps/chosen": -2.0102527141571045, "logps/rejected": -2.346550941467285, "loss": 1.4843, "nll_loss": 1.4210987091064453, "rewards/accuracies": 0.625, "rewards/chosen": -0.20102527737617493, "rewards/margins": 0.03362984210252762, "rewards/rejected": -0.23465511202812195, "step": 644 }, { "epoch": 1.7659137577002053, "grad_norm": 3.1582560539245605, "learning_rate": 9.116438356164384e-07, "log_odds_chosen": 0.16015000641345978, "log_odds_ratio": -0.6624826192855835, "logits/chosen": -0.6263144016265869, "logits/rejected": -0.7075644135475159, "logps/chosen": -1.9089173078536987, "logps/rejected": -2.026702880859375, "loss": 1.4638, "nll_loss": 1.3975543975830078, "rewards/accuracies": 0.75, "rewards/chosen": -0.19089174270629883, "rewards/margins": 0.011778555810451508, "rewards/rejected": -0.20267030596733093, "step": 645 }, { "epoch": 1.7686516084873376, "grad_norm": 3.8910071849823, "learning_rate": 9.115068493150685e-07, "log_odds_chosen": -0.4278303384780884, "log_odds_ratio": -1.0418068170547485, "logits/chosen": -0.6229652166366577, "logits/rejected": -0.6260428428649902, "logps/chosen": -2.7971272468566895, "logps/rejected": -2.3696160316467285, "loss": 1.4254, "nll_loss": 1.3212169408798218, "rewards/accuracies": 0.375, "rewards/chosen": -0.2797127068042755, "rewards/margins": -0.04275112971663475, "rewards/rejected": -0.23696158826351166, "step": 646 }, { "epoch": 1.7713894592744697, "grad_norm": 3.8203752040863037, "learning_rate": 9.113698630136986e-07, "log_odds_chosen": 0.11037562042474747, "log_odds_ratio": -0.7659599184989929, "logits/chosen": -0.6067891120910645, "logits/rejected": -0.6911497712135315, "logps/chosen": -2.291125774383545, "logps/rejected": -2.3621740341186523, "loss": 1.4031, "nll_loss": 1.3265397548675537, "rewards/accuracies": 0.75, "rewards/chosen": -0.22911256551742554, "rewards/margins": 0.007104840129613876, "rewards/rejected": -0.2362174093723297, "step": 647 }, { "epoch": 1.7741273100616017, "grad_norm": 4.4482831954956055, "learning_rate": 9.112328767123288e-07, "log_odds_chosen": -0.4324829578399658, "log_odds_ratio": -0.9925962686538696, "logits/chosen": -0.5018641948699951, "logits/rejected": -0.43763917684555054, "logps/chosen": -3.5895824432373047, "logps/rejected": -3.155261516571045, "loss": 1.3616, "nll_loss": 1.2623790502548218, "rewards/accuracies": 0.25, "rewards/chosen": -0.3589582145214081, "rewards/margins": -0.0434320792555809, "rewards/rejected": -0.31552615761756897, "step": 648 }, { "epoch": 1.7768651608487338, "grad_norm": 3.137188196182251, "learning_rate": 9.11095890410959e-07, "log_odds_chosen": 0.41385531425476074, "log_odds_ratio": -0.5439070463180542, "logits/chosen": -0.5149348378181458, "logits/rejected": -0.5740922093391418, "logps/chosen": -1.8547163009643555, "logps/rejected": -2.2000601291656494, "loss": 1.2963, "nll_loss": 1.2419097423553467, "rewards/accuracies": 0.75, "rewards/chosen": -0.18547163903713226, "rewards/margins": 0.03453437238931656, "rewards/rejected": -0.22000601887702942, "step": 649 }, { "epoch": 1.7796030116358659, "grad_norm": 3.33481502532959, "learning_rate": 9.10958904109589e-07, "log_odds_chosen": 1.2137740850448608, "log_odds_ratio": -0.3778538107872009, "logits/chosen": -0.632524311542511, "logits/rejected": -0.6582404971122742, "logps/chosen": -2.316256046295166, "logps/rejected": -3.419646978378296, "loss": 1.4064, "nll_loss": 1.3686394691467285, "rewards/accuracies": 0.875, "rewards/chosen": -0.23162560164928436, "rewards/margins": 0.11033909767866135, "rewards/rejected": -0.3419646918773651, "step": 650 }, { "epoch": 1.782340862422998, "grad_norm": 3.9347176551818848, "learning_rate": 9.108219178082192e-07, "log_odds_chosen": -0.7022277116775513, "log_odds_ratio": -1.2001352310180664, "logits/chosen": -0.5322099924087524, "logits/rejected": -0.5237811803817749, "logps/chosen": -3.1821322441101074, "logps/rejected": -2.4716970920562744, "loss": 1.4912, "nll_loss": 1.371152400970459, "rewards/accuracies": 0.375, "rewards/chosen": -0.31821325421333313, "rewards/margins": -0.07104352116584778, "rewards/rejected": -0.24716971814632416, "step": 651 }, { "epoch": 1.78507871321013, "grad_norm": 3.4751458168029785, "learning_rate": 9.106849315068493e-07, "log_odds_chosen": 0.3517645001411438, "log_odds_ratio": -0.6760157346725464, "logits/chosen": -0.6073719263076782, "logits/rejected": -0.6235400438308716, "logps/chosen": -2.461054563522339, "logps/rejected": -2.706247329711914, "loss": 1.3953, "nll_loss": 1.3277201652526855, "rewards/accuracies": 0.5, "rewards/chosen": -0.24610546231269836, "rewards/margins": 0.02451927587389946, "rewards/rejected": -0.27062472701072693, "step": 652 }, { "epoch": 1.787816563997262, "grad_norm": 3.24894380569458, "learning_rate": 9.105479452054794e-07, "log_odds_chosen": 0.4467800557613373, "log_odds_ratio": -0.6789511442184448, "logits/chosen": -0.6321710348129272, "logits/rejected": -0.6453995704650879, "logps/chosen": -2.2450509071350098, "logps/rejected": -2.6391501426696777, "loss": 1.3921, "nll_loss": 1.3242104053497314, "rewards/accuracies": 0.375, "rewards/chosen": -0.22450509667396545, "rewards/margins": 0.03940989822149277, "rewards/rejected": -0.2639150023460388, "step": 653 }, { "epoch": 1.7905544147843941, "grad_norm": 4.515866756439209, "learning_rate": 9.104109589041096e-07, "log_odds_chosen": -0.4895222783088684, "log_odds_ratio": -1.17719566822052, "logits/chosen": -0.4779585897922516, "logits/rejected": -0.4589422345161438, "logps/chosen": -2.7572643756866455, "logps/rejected": -2.279005765914917, "loss": 1.444, "nll_loss": 1.3262543678283691, "rewards/accuracies": 0.75, "rewards/chosen": -0.27572643756866455, "rewards/margins": -0.04782584309577942, "rewards/rejected": -0.22790059447288513, "step": 654 }, { "epoch": 1.7932922655715262, "grad_norm": 3.6247782707214355, "learning_rate": 9.102739726027397e-07, "log_odds_chosen": -0.3026524782180786, "log_odds_ratio": -0.9471042156219482, "logits/chosen": -0.6339415311813354, "logits/rejected": -0.6133451461791992, "logps/chosen": -2.7316761016845703, "logps/rejected": -2.4436631202697754, "loss": 1.4241, "nll_loss": 1.3293955326080322, "rewards/accuracies": 0.375, "rewards/chosen": -0.27316761016845703, "rewards/margins": -0.028801292181015015, "rewards/rejected": -0.24436631798744202, "step": 655 }, { "epoch": 1.7960301163586585, "grad_norm": 3.197453260421753, "learning_rate": 9.101369863013698e-07, "log_odds_chosen": 0.226730614900589, "log_odds_ratio": -0.684840977191925, "logits/chosen": -0.6221047639846802, "logits/rejected": -0.701923131942749, "logps/chosen": -2.5476038455963135, "logps/rejected": -2.7192723751068115, "loss": 1.4961, "nll_loss": 1.427635908126831, "rewards/accuracies": 0.625, "rewards/chosen": -0.25476038455963135, "rewards/margins": 0.0171668641269207, "rewards/rejected": -0.27192723751068115, "step": 656 }, { "epoch": 1.7987679671457906, "grad_norm": 3.976059913635254, "learning_rate": 9.1e-07, "log_odds_chosen": -0.2771559953689575, "log_odds_ratio": -1.012890338897705, "logits/chosen": -0.6508346199989319, "logits/rejected": -0.6212005615234375, "logps/chosen": -3.01701021194458, "logps/rejected": -2.7208664417266846, "loss": 1.412, "nll_loss": 1.310755968093872, "rewards/accuracies": 0.5, "rewards/chosen": -0.30170103907585144, "rewards/margins": -0.029614364728331566, "rewards/rejected": -0.27208665013313293, "step": 657 }, { "epoch": 1.8015058179329226, "grad_norm": 3.566107749938965, "learning_rate": 9.098630136986301e-07, "log_odds_chosen": 0.5056416988372803, "log_odds_ratio": -0.5828487873077393, "logits/chosen": -0.5858663320541382, "logits/rejected": -0.5976414680480957, "logps/chosen": -2.4102206230163574, "logps/rejected": -2.8517942428588867, "loss": 1.3364, "nll_loss": 1.2781392335891724, "rewards/accuracies": 0.875, "rewards/chosen": -0.24102208018302917, "rewards/margins": 0.04415734484791756, "rewards/rejected": -0.2851794362068176, "step": 658 }, { "epoch": 1.8042436687200547, "grad_norm": 4.030274868011475, "learning_rate": 9.097260273972603e-07, "log_odds_chosen": 0.6402657628059387, "log_odds_ratio": -0.4804302453994751, "logits/chosen": -0.4681442081928253, "logits/rejected": -0.4747370481491089, "logps/chosen": -3.2502048015594482, "logps/rejected": -3.818521022796631, "loss": 1.333, "nll_loss": 1.2849421501159668, "rewards/accuracies": 0.875, "rewards/chosen": -0.3250204920768738, "rewards/margins": 0.05683162435889244, "rewards/rejected": -0.3818521201610565, "step": 659 }, { "epoch": 1.806981519507187, "grad_norm": 4.051973342895508, "learning_rate": 9.095890410958904e-07, "log_odds_chosen": -0.3960878551006317, "log_odds_ratio": -1.1620521545410156, "logits/chosen": -0.5365023612976074, "logits/rejected": -0.4787779152393341, "logps/chosen": -3.037567138671875, "logps/rejected": -2.6252007484436035, "loss": 1.5056, "nll_loss": 1.3893826007843018, "rewards/accuracies": 0.375, "rewards/chosen": -0.3037567436695099, "rewards/margins": -0.04123665392398834, "rewards/rejected": -0.26252010464668274, "step": 660 }, { "epoch": 1.809719370294319, "grad_norm": 3.3171019554138184, "learning_rate": 9.094520547945205e-07, "log_odds_chosen": 0.20636257529258728, "log_odds_ratio": -0.7681694626808167, "logits/chosen": -0.4700889587402344, "logits/rejected": -0.5084631443023682, "logps/chosen": -2.549612522125244, "logps/rejected": -2.68106746673584, "loss": 1.4091, "nll_loss": 1.3322635889053345, "rewards/accuracies": 0.625, "rewards/chosen": -0.2549612522125244, "rewards/margins": 0.013145482167601585, "rewards/rejected": -0.26810675859451294, "step": 661 }, { "epoch": 1.8124572210814511, "grad_norm": 3.51228666305542, "learning_rate": 9.093150684931507e-07, "log_odds_chosen": 0.31733524799346924, "log_odds_ratio": -0.740580677986145, "logits/chosen": -0.570685625076294, "logits/rejected": -0.6129416227340698, "logps/chosen": -2.9166970252990723, "logps/rejected": -3.120121479034424, "loss": 1.4141, "nll_loss": 1.3400111198425293, "rewards/accuracies": 0.625, "rewards/chosen": -0.29166969656944275, "rewards/margins": 0.020342443138360977, "rewards/rejected": -0.31201210618019104, "step": 662 }, { "epoch": 1.8151950718685832, "grad_norm": 3.3660335540771484, "learning_rate": 9.091780821917808e-07, "log_odds_chosen": 0.41198045015335083, "log_odds_ratio": -0.6468285322189331, "logits/chosen": -0.5667423605918884, "logits/rejected": -0.6023590564727783, "logps/chosen": -2.9463162422180176, "logps/rejected": -3.2925643920898438, "loss": 1.4353, "nll_loss": 1.37057363986969, "rewards/accuracies": 0.75, "rewards/chosen": -0.29463163018226624, "rewards/margins": 0.034624792635440826, "rewards/rejected": -0.32925644516944885, "step": 663 }, { "epoch": 1.8179329226557153, "grad_norm": 3.6983563899993896, "learning_rate": 9.090410958904109e-07, "log_odds_chosen": 0.6613391637802124, "log_odds_ratio": -0.7157090306282043, "logits/chosen": -0.475394070148468, "logits/rejected": -0.520538330078125, "logps/chosen": -2.541721820831299, "logps/rejected": -3.0381057262420654, "loss": 1.3497, "nll_loss": 1.2781418561935425, "rewards/accuracies": 0.625, "rewards/chosen": -0.2541721761226654, "rewards/margins": 0.049638405442237854, "rewards/rejected": -0.30381059646606445, "step": 664 }, { "epoch": 1.8206707734428473, "grad_norm": 3.4974453449249268, "learning_rate": 9.089041095890411e-07, "log_odds_chosen": 0.4505589008331299, "log_odds_ratio": -0.6529409885406494, "logits/chosen": -0.4429040551185608, "logits/rejected": -0.4486869275569916, "logps/chosen": -2.5110912322998047, "logps/rejected": -2.8893730640411377, "loss": 1.4659, "nll_loss": 1.400633692741394, "rewards/accuracies": 0.625, "rewards/chosen": -0.25110912322998047, "rewards/margins": 0.037828199565410614, "rewards/rejected": -0.2889373004436493, "step": 665 }, { "epoch": 1.8234086242299794, "grad_norm": 2.983185052871704, "learning_rate": 9.087671232876713e-07, "log_odds_chosen": 0.5483094453811646, "log_odds_ratio": -0.5684313774108887, "logits/chosen": -0.5051244497299194, "logits/rejected": -0.521244466304779, "logps/chosen": -1.8408710956573486, "logps/rejected": -2.261918067932129, "loss": 1.3201, "nll_loss": 1.2632373571395874, "rewards/accuracies": 0.625, "rewards/chosen": -0.1840871274471283, "rewards/margins": 0.04210469499230385, "rewards/rejected": -0.22619181871414185, "step": 666 }, { "epoch": 1.8261464750171115, "grad_norm": 4.746553897857666, "learning_rate": 9.086301369863013e-07, "log_odds_chosen": -1.571583867073059, "log_odds_ratio": -1.9048539400100708, "logits/chosen": -0.5638530850410461, "logits/rejected": -0.49245837330818176, "logps/chosen": -4.092933654785156, "logps/rejected": -2.5599558353424072, "loss": 1.4833, "nll_loss": 1.2928086519241333, "rewards/accuracies": 0.25, "rewards/chosen": -0.40929335355758667, "rewards/margins": -0.15329775214195251, "rewards/rejected": -0.25599557161331177, "step": 667 }, { "epoch": 1.8288843258042435, "grad_norm": 2.9420228004455566, "learning_rate": 9.084931506849315e-07, "log_odds_chosen": 0.4688592255115509, "log_odds_ratio": -0.5294506549835205, "logits/chosen": -0.5871253609657288, "logits/rejected": -0.5713080167770386, "logps/chosen": -1.7344574928283691, "logps/rejected": -2.109886407852173, "loss": 1.319, "nll_loss": 1.2660847902297974, "rewards/accuracies": 0.875, "rewards/chosen": -0.17344573140144348, "rewards/margins": 0.03754289075732231, "rewards/rejected": -0.21098864078521729, "step": 668 }, { "epoch": 1.8316221765913756, "grad_norm": 3.368323564529419, "learning_rate": 9.083561643835616e-07, "log_odds_chosen": 1.010154366493225, "log_odds_ratio": -0.5231979489326477, "logits/chosen": -0.4634869396686554, "logits/rejected": -0.5089804530143738, "logps/chosen": -2.0900278091430664, "logps/rejected": -3.0205001831054688, "loss": 1.3562, "nll_loss": 1.3039042949676514, "rewards/accuracies": 0.75, "rewards/chosen": -0.2090027779340744, "rewards/margins": 0.09304723888635635, "rewards/rejected": -0.30205002427101135, "step": 669 }, { "epoch": 1.834360027378508, "grad_norm": 3.5625596046447754, "learning_rate": 9.082191780821917e-07, "log_odds_chosen": -0.7549686431884766, "log_odds_ratio": -1.3461217880249023, "logits/chosen": -0.5847073793411255, "logits/rejected": -0.6477680206298828, "logps/chosen": -3.0854978561401367, "logps/rejected": -2.316183090209961, "loss": 1.4133, "nll_loss": 1.2786664962768555, "rewards/accuracies": 0.5, "rewards/chosen": -0.30854982137680054, "rewards/margins": -0.07693149894475937, "rewards/rejected": -0.23161831498146057, "step": 670 }, { "epoch": 1.83709787816564, "grad_norm": 4.900373458862305, "learning_rate": 9.080821917808219e-07, "log_odds_chosen": 0.08058647811412811, "log_odds_ratio": -0.8146313428878784, "logits/chosen": -0.39445990324020386, "logits/rejected": -0.3831467628479004, "logps/chosen": -2.9405083656311035, "logps/rejected": -2.9590039253234863, "loss": 1.3765, "nll_loss": 1.2950669527053833, "rewards/accuracies": 0.5, "rewards/chosen": -0.29405084252357483, "rewards/margins": 0.0018495554104447365, "rewards/rejected": -0.2959004044532776, "step": 671 }, { "epoch": 1.839835728952772, "grad_norm": 3.3360586166381836, "learning_rate": 9.07945205479452e-07, "log_odds_chosen": 0.4255877137184143, "log_odds_ratio": -0.7316431403160095, "logits/chosen": -0.3993760347366333, "logits/rejected": -0.4553118944168091, "logps/chosen": -2.3634750843048096, "logps/rejected": -2.69478178024292, "loss": 1.3807, "nll_loss": 1.3075392246246338, "rewards/accuracies": 0.75, "rewards/chosen": -0.236347496509552, "rewards/margins": 0.03313068673014641, "rewards/rejected": -0.2694782018661499, "step": 672 }, { "epoch": 1.8425735797399043, "grad_norm": 3.5296080112457275, "learning_rate": 9.078082191780822e-07, "log_odds_chosen": -0.08370143920183182, "log_odds_ratio": -0.7811952829360962, "logits/chosen": -0.4459123909473419, "logits/rejected": -0.4283888041973114, "logps/chosen": -2.541370153427124, "logps/rejected": -2.4572722911834717, "loss": 1.423, "nll_loss": 1.3448859453201294, "rewards/accuracies": 0.375, "rewards/chosen": -0.2541370391845703, "rewards/margins": -0.008409783244132996, "rewards/rejected": -0.24572724103927612, "step": 673 }, { "epoch": 1.8453114305270364, "grad_norm": 3.321152448654175, "learning_rate": 9.076712328767123e-07, "log_odds_chosen": -0.09274119883775711, "log_odds_ratio": -0.8011950254440308, "logits/chosen": -0.5376520156860352, "logits/rejected": -0.49869251251220703, "logps/chosen": -2.2571780681610107, "logps/rejected": -2.1717770099639893, "loss": 1.3666, "nll_loss": 1.2865216732025146, "rewards/accuracies": 0.375, "rewards/chosen": -0.22571781277656555, "rewards/margins": -0.008540092036128044, "rewards/rejected": -0.21717771887779236, "step": 674 }, { "epoch": 1.8480492813141685, "grad_norm": 3.4708189964294434, "learning_rate": 9.075342465753424e-07, "log_odds_chosen": -0.2473965436220169, "log_odds_ratio": -1.0204371213912964, "logits/chosen": -0.6005933284759521, "logits/rejected": -0.59515780210495, "logps/chosen": -2.8977928161621094, "logps/rejected": -2.631960868835449, "loss": 1.3986, "nll_loss": 1.2965681552886963, "rewards/accuracies": 0.5, "rewards/chosen": -0.28977930545806885, "rewards/margins": -0.02658318728208542, "rewards/rejected": -0.26319611072540283, "step": 675 }, { "epoch": 1.8507871321013005, "grad_norm": 3.539891481399536, "learning_rate": 9.073972602739726e-07, "log_odds_chosen": -0.31218770146369934, "log_odds_ratio": -0.9474655985832214, "logits/chosen": -0.56847083568573, "logits/rejected": -0.5797737836837769, "logps/chosen": -3.12070894241333, "logps/rejected": -2.7977497577667236, "loss": 1.3363, "nll_loss": 1.2415847778320312, "rewards/accuracies": 0.5, "rewards/chosen": -0.31207090616226196, "rewards/margins": -0.032295919954776764, "rewards/rejected": -0.2797749936580658, "step": 676 }, { "epoch": 1.8535249828884326, "grad_norm": 3.5549213886260986, "learning_rate": 9.072602739726027e-07, "log_odds_chosen": 0.6032496690750122, "log_odds_ratio": -0.5136840343475342, "logits/chosen": -0.262185662984848, "logits/rejected": -0.28749269247055054, "logps/chosen": -2.1691348552703857, "logps/rejected": -2.660841464996338, "loss": 1.2889, "nll_loss": 1.237537145614624, "rewards/accuracies": 0.875, "rewards/chosen": -0.21691349148750305, "rewards/margins": 0.04917065054178238, "rewards/rejected": -0.26608413457870483, "step": 677 }, { "epoch": 1.8562628336755647, "grad_norm": 3.203569173812866, "learning_rate": 9.071232876712328e-07, "log_odds_chosen": 1.4580320119857788, "log_odds_ratio": -0.5121516585350037, "logits/chosen": -0.4356015920639038, "logits/rejected": -0.5056192874908447, "logps/chosen": -2.3714005947113037, "logps/rejected": -3.760253429412842, "loss": 1.3086, "nll_loss": 1.2573750019073486, "rewards/accuracies": 0.75, "rewards/chosen": -0.23714004456996918, "rewards/margins": 0.1388852894306183, "rewards/rejected": -0.37602531909942627, "step": 678 }, { "epoch": 1.8590006844626967, "grad_norm": 3.652794361114502, "learning_rate": 9.06986301369863e-07, "log_odds_chosen": 0.3690614700317383, "log_odds_ratio": -0.7142367362976074, "logits/chosen": -0.38956576585769653, "logits/rejected": -0.4695509672164917, "logps/chosen": -2.2381019592285156, "logps/rejected": -2.5284056663513184, "loss": 1.2593, "nll_loss": 1.1878581047058105, "rewards/accuracies": 0.625, "rewards/chosen": -0.22381019592285156, "rewards/margins": 0.029030362144112587, "rewards/rejected": -0.2528405487537384, "step": 679 }, { "epoch": 1.8617385352498288, "grad_norm": 3.440950393676758, "learning_rate": 9.068493150684932e-07, "log_odds_chosen": 0.5978532433509827, "log_odds_ratio": -0.6104034781455994, "logits/chosen": -0.4088445007801056, "logits/rejected": -0.4432867765426636, "logps/chosen": -2.1983323097229004, "logps/rejected": -2.7483327388763428, "loss": 1.3004, "nll_loss": 1.2393391132354736, "rewards/accuracies": 0.75, "rewards/chosen": -0.21983322501182556, "rewards/margins": 0.055000051856040955, "rewards/rejected": -0.2748332917690277, "step": 680 }, { "epoch": 1.8644763860369609, "grad_norm": 3.9661850929260254, "learning_rate": 9.067123287671232e-07, "log_odds_chosen": -0.2518830895423889, "log_odds_ratio": -1.0063409805297852, "logits/chosen": -0.376838743686676, "logits/rejected": -0.3438349664211273, "logps/chosen": -2.771587610244751, "logps/rejected": -2.5604751110076904, "loss": 1.2858, "nll_loss": 1.1851799488067627, "rewards/accuracies": 0.375, "rewards/chosen": -0.2771587669849396, "rewards/margins": -0.02111125737428665, "rewards/rejected": -0.2560475170612335, "step": 681 }, { "epoch": 1.867214236824093, "grad_norm": 5.103271961212158, "learning_rate": 9.065753424657534e-07, "log_odds_chosen": -0.78275465965271, "log_odds_ratio": -1.499577283859253, "logits/chosen": -0.45928841829299927, "logits/rejected": -0.49748674035072327, "logps/chosen": -3.296388626098633, "logps/rejected": -2.4973514080047607, "loss": 1.4139, "nll_loss": 1.2639541625976562, "rewards/accuracies": 0.625, "rewards/chosen": -0.32963889837265015, "rewards/margins": -0.0799037367105484, "rewards/rejected": -0.24973514676094055, "step": 682 }, { "epoch": 1.8699520876112252, "grad_norm": 3.6243789196014404, "learning_rate": 9.064383561643835e-07, "log_odds_chosen": 0.01714468002319336, "log_odds_ratio": -0.7694122791290283, "logits/chosen": -0.519487738609314, "logits/rejected": -0.5463556051254272, "logps/chosen": -2.4663290977478027, "logps/rejected": -2.440303325653076, "loss": 1.372, "nll_loss": 1.295014500617981, "rewards/accuracies": 0.5, "rewards/chosen": -0.246632918715477, "rewards/margins": -0.00260258000344038, "rewards/rejected": -0.24403032660484314, "step": 683 }, { "epoch": 1.8726899383983573, "grad_norm": 4.251916885375977, "learning_rate": 9.063013698630136e-07, "log_odds_chosen": -0.46754172444343567, "log_odds_ratio": -1.0988516807556152, "logits/chosen": -0.440014511346817, "logits/rejected": -0.35874539613723755, "logps/chosen": -3.148494243621826, "logps/rejected": -2.698021411895752, "loss": 1.3436, "nll_loss": 1.2336704730987549, "rewards/accuracies": 0.25, "rewards/chosen": -0.3148494362831116, "rewards/margins": -0.04504731297492981, "rewards/rejected": -0.26980212330818176, "step": 684 }, { "epoch": 1.8754277891854894, "grad_norm": 4.608383655548096, "learning_rate": 9.061643835616438e-07, "log_odds_chosen": -0.9148302674293518, "log_odds_ratio": -1.4798810482025146, "logits/chosen": -0.2844330668449402, "logits/rejected": -0.24768517911434174, "logps/chosen": -3.39292311668396, "logps/rejected": -2.4838485717773438, "loss": 1.4775, "nll_loss": 1.3294652700424194, "rewards/accuracies": 0.375, "rewards/chosen": -0.33929234743118286, "rewards/margins": -0.09090747684240341, "rewards/rejected": -0.24838486313819885, "step": 685 }, { "epoch": 1.8781656399726216, "grad_norm": 3.064891815185547, "learning_rate": 9.060273972602739e-07, "log_odds_chosen": -0.027147412300109863, "log_odds_ratio": -0.7500839233398438, "logits/chosen": -0.5753992199897766, "logits/rejected": -0.6147300601005554, "logps/chosen": -2.355851173400879, "logps/rejected": -2.306635856628418, "loss": 1.3424, "nll_loss": 1.2673442363739014, "rewards/accuracies": 0.5, "rewards/chosen": -0.23558512330055237, "rewards/margins": -0.004921548068523407, "rewards/rejected": -0.23066358268260956, "step": 686 }, { "epoch": 1.8809034907597537, "grad_norm": 3.4954991340637207, "learning_rate": 9.058904109589041e-07, "log_odds_chosen": 0.0863991528749466, "log_odds_ratio": -0.7145488858222961, "logits/chosen": -0.46255791187286377, "logits/rejected": -0.4755825400352478, "logps/chosen": -2.2542996406555176, "logps/rejected": -2.308316230773926, "loss": 1.2944, "nll_loss": 1.2229684591293335, "rewards/accuracies": 0.625, "rewards/chosen": -0.225429967045784, "rewards/margins": 0.00540167186409235, "rewards/rejected": -0.23083163797855377, "step": 687 }, { "epoch": 1.8836413415468858, "grad_norm": 3.3388843536376953, "learning_rate": 9.057534246575342e-07, "log_odds_chosen": 1.3842427730560303, "log_odds_ratio": -0.6679931879043579, "logits/chosen": -0.32940736413002014, "logits/rejected": -0.42988044023513794, "logps/chosen": -2.649280071258545, "logps/rejected": -3.9873557090759277, "loss": 1.3116, "nll_loss": 1.2448327541351318, "rewards/accuracies": 0.625, "rewards/chosen": -0.26492801308631897, "rewards/margins": 0.13380753993988037, "rewards/rejected": -0.39873558282852173, "step": 688 }, { "epoch": 1.8863791923340179, "grad_norm": 4.349141597747803, "learning_rate": 9.056164383561643e-07, "log_odds_chosen": -0.2277071177959442, "log_odds_ratio": -1.2332359552383423, "logits/chosen": -0.5090928673744202, "logits/rejected": -0.5142104625701904, "logps/chosen": -3.5437092781066895, "logps/rejected": -3.2130942344665527, "loss": 1.3823, "nll_loss": 1.2589666843414307, "rewards/accuracies": 0.375, "rewards/chosen": -0.35437095165252686, "rewards/margins": -0.033061470836400986, "rewards/rejected": -0.32130947709083557, "step": 689 }, { "epoch": 1.88911704312115, "grad_norm": 4.739871025085449, "learning_rate": 9.054794520547945e-07, "log_odds_chosen": 0.031730055809020996, "log_odds_ratio": -0.7689390778541565, "logits/chosen": -0.3679565191268921, "logits/rejected": -0.313154399394989, "logps/chosen": -3.2455947399139404, "logps/rejected": -3.2437491416931152, "loss": 1.2902, "nll_loss": 1.2133240699768066, "rewards/accuracies": 0.375, "rewards/chosen": -0.32455945014953613, "rewards/margins": -0.00018454715609550476, "rewards/rejected": -0.3243749141693115, "step": 690 }, { "epoch": 1.891854893908282, "grad_norm": 3.243879556655884, "learning_rate": 9.053424657534246e-07, "log_odds_chosen": 0.4816682040691376, "log_odds_ratio": -0.8734560608863831, "logits/chosen": -0.3625141680240631, "logits/rejected": -0.40789711475372314, "logps/chosen": -2.2195253372192383, "logps/rejected": -2.638895034790039, "loss": 1.3232, "nll_loss": 1.2358129024505615, "rewards/accuracies": 0.625, "rewards/chosen": -0.22195252776145935, "rewards/margins": 0.041937001049518585, "rewards/rejected": -0.26388952136039734, "step": 691 }, { "epoch": 1.894592744695414, "grad_norm": 4.308997631072998, "learning_rate": 9.052054794520547e-07, "log_odds_chosen": -0.47880104184150696, "log_odds_ratio": -1.094358205795288, "logits/chosen": -0.4548197388648987, "logits/rejected": -0.47195491194725037, "logps/chosen": -3.1217312812805176, "logps/rejected": -2.660536289215088, "loss": 1.4326, "nll_loss": 1.3231806755065918, "rewards/accuracies": 0.5, "rewards/chosen": -0.31217315793037415, "rewards/margins": -0.04611951857805252, "rewards/rejected": -0.2660536468029022, "step": 692 }, { "epoch": 1.8973305954825461, "grad_norm": 3.3978843688964844, "learning_rate": 9.050684931506849e-07, "log_odds_chosen": 0.18600907921791077, "log_odds_ratio": -0.6699503660202026, "logits/chosen": -0.4486679136753082, "logits/rejected": -0.5182151198387146, "logps/chosen": -2.5241198539733887, "logps/rejected": -2.659837484359741, "loss": 1.2714, "nll_loss": 1.2043647766113281, "rewards/accuracies": 0.75, "rewards/chosen": -0.25241199135780334, "rewards/margins": 0.013571765273809433, "rewards/rejected": -0.2659837603569031, "step": 693 }, { "epoch": 1.9000684462696782, "grad_norm": 2.7766950130462646, "learning_rate": 9.049315068493151e-07, "log_odds_chosen": 1.0796685218811035, "log_odds_ratio": -0.5694616436958313, "logits/chosen": -0.4186883568763733, "logits/rejected": -0.5291920900344849, "logps/chosen": -1.9755229949951172, "logps/rejected": -2.9684815406799316, "loss": 1.3115, "nll_loss": 1.2545064687728882, "rewards/accuracies": 0.75, "rewards/chosen": -0.19755229353904724, "rewards/margins": 0.09929586201906204, "rewards/rejected": -0.2968481481075287, "step": 694 }, { "epoch": 1.9028062970568103, "grad_norm": 4.1788482666015625, "learning_rate": 9.047945205479451e-07, "log_odds_chosen": -0.7271530032157898, "log_odds_ratio": -1.1915228366851807, "logits/chosen": -0.5928769111633301, "logits/rejected": -0.5166879892349243, "logps/chosen": -3.0072460174560547, "logps/rejected": -2.3225789070129395, "loss": 1.3469, "nll_loss": 1.227702260017395, "rewards/accuracies": 0.25, "rewards/chosen": -0.300724595785141, "rewards/margins": -0.06846672296524048, "rewards/rejected": -0.2322578728199005, "step": 695 }, { "epoch": 1.9055441478439425, "grad_norm": 3.6382012367248535, "learning_rate": 9.046575342465753e-07, "log_odds_chosen": -0.289877325296402, "log_odds_ratio": -0.9888838529586792, "logits/chosen": -0.49202942848205566, "logits/rejected": -0.48211243748664856, "logps/chosen": -3.108776092529297, "logps/rejected": -2.836763858795166, "loss": 1.4143, "nll_loss": 1.3153823614120483, "rewards/accuracies": 0.625, "rewards/chosen": -0.31087762117385864, "rewards/margins": -0.027201233431696892, "rewards/rejected": -0.2836763858795166, "step": 696 }, { "epoch": 1.9082819986310746, "grad_norm": 4.426705837249756, "learning_rate": 9.045205479452055e-07, "log_odds_chosen": -0.15038758516311646, "log_odds_ratio": -0.9323725700378418, "logits/chosen": -0.4877655506134033, "logits/rejected": -0.5299578905105591, "logps/chosen": -2.808990478515625, "logps/rejected": -2.624767303466797, "loss": 1.2818, "nll_loss": 1.1886048316955566, "rewards/accuracies": 0.625, "rewards/chosen": -0.2808990478515625, "rewards/margins": -0.01842230185866356, "rewards/rejected": -0.26247674226760864, "step": 697 }, { "epoch": 1.9110198494182067, "grad_norm": 3.062695026397705, "learning_rate": 9.043835616438355e-07, "log_odds_chosen": 0.43224412202835083, "log_odds_ratio": -0.6329264044761658, "logits/chosen": -0.37900805473327637, "logits/rejected": -0.4510905146598816, "logps/chosen": -2.3432652950286865, "logps/rejected": -2.714386463165283, "loss": 1.3122, "nll_loss": 1.2489326000213623, "rewards/accuracies": 0.625, "rewards/chosen": -0.2343265414237976, "rewards/margins": 0.037112124264240265, "rewards/rejected": -0.2714386582374573, "step": 698 }, { "epoch": 1.913757700205339, "grad_norm": 3.9448142051696777, "learning_rate": 9.042465753424657e-07, "log_odds_chosen": 0.47418129444122314, "log_odds_ratio": -0.8633025288581848, "logits/chosen": -0.48282086849212646, "logits/rejected": -0.48298871517181396, "logps/chosen": -2.856567621231079, "logps/rejected": -3.3179373741149902, "loss": 1.3754, "nll_loss": 1.2891011238098145, "rewards/accuracies": 0.375, "rewards/chosen": -0.28565678000450134, "rewards/margins": 0.046136967837810516, "rewards/rejected": -0.33179372549057007, "step": 699 }, { "epoch": 1.916495550992471, "grad_norm": 3.892887592315674, "learning_rate": 9.041095890410958e-07, "log_odds_chosen": -0.5324172377586365, "log_odds_ratio": -1.0627543926239014, "logits/chosen": -0.47261446714401245, "logits/rejected": -0.38063058257102966, "logps/chosen": -2.9726879596710205, "logps/rejected": -2.486952543258667, "loss": 1.339, "nll_loss": 1.2327215671539307, "rewards/accuracies": 0.25, "rewards/chosen": -0.2972687780857086, "rewards/margins": -0.048573534935712814, "rewards/rejected": -0.2486952543258667, "step": 700 }, { "epoch": 1.919233401779603, "grad_norm": 3.422020435333252, "learning_rate": 9.03972602739726e-07, "log_odds_chosen": 0.05053822696208954, "log_odds_ratio": -0.7357112169265747, "logits/chosen": -0.4033310115337372, "logits/rejected": -0.4460254907608032, "logps/chosen": -2.4864284992218018, "logps/rejected": -2.5125370025634766, "loss": 1.3913, "nll_loss": 1.3176848888397217, "rewards/accuracies": 0.5, "rewards/chosen": -0.24864284694194794, "rewards/margins": 0.002610865980386734, "rewards/rejected": -0.25125372409820557, "step": 701 }, { "epoch": 1.9219712525667352, "grad_norm": 3.158318519592285, "learning_rate": 9.038356164383561e-07, "log_odds_chosen": -0.8331902027130127, "log_odds_ratio": -1.4347177743911743, "logits/chosen": -0.5458742380142212, "logits/rejected": -0.5496008992195129, "logps/chosen": -2.76944637298584, "logps/rejected": -1.9328248500823975, "loss": 1.3677, "nll_loss": 1.2241847515106201, "rewards/accuracies": 0.5, "rewards/chosen": -0.276944637298584, "rewards/margins": -0.08366213738918304, "rewards/rejected": -0.19328249990940094, "step": 702 }, { "epoch": 1.9247091033538672, "grad_norm": 3.2448689937591553, "learning_rate": 9.036986301369862e-07, "log_odds_chosen": 0.3099938631057739, "log_odds_ratio": -0.6533499956130981, "logits/chosen": -0.3189927339553833, "logits/rejected": -0.3505736291408539, "logps/chosen": -2.4457101821899414, "logps/rejected": -2.7221148014068604, "loss": 1.2877, "nll_loss": 1.2223844528198242, "rewards/accuracies": 0.75, "rewards/chosen": -0.2445710301399231, "rewards/margins": 0.027640435844659805, "rewards/rejected": -0.2722114622592926, "step": 703 }, { "epoch": 1.9274469541409993, "grad_norm": 3.693089723587036, "learning_rate": 9.035616438356164e-07, "log_odds_chosen": 0.2659534513950348, "log_odds_ratio": -0.8743306994438171, "logits/chosen": -0.5014724135398865, "logits/rejected": -0.5670430660247803, "logps/chosen": -3.0717551708221436, "logps/rejected": -3.257345199584961, "loss": 1.3944, "nll_loss": 1.3069332838058472, "rewards/accuracies": 0.75, "rewards/chosen": -0.30717551708221436, "rewards/margins": 0.01855902560055256, "rewards/rejected": -0.32573455572128296, "step": 704 }, { "epoch": 1.9301848049281314, "grad_norm": 3.5255792140960693, "learning_rate": 9.034246575342465e-07, "log_odds_chosen": 0.0673714429140091, "log_odds_ratio": -0.7640794515609741, "logits/chosen": -0.3743203580379486, "logits/rejected": -0.3960377275943756, "logps/chosen": -2.1910200119018555, "logps/rejected": -2.1940088272094727, "loss": 1.2439, "nll_loss": 1.1675258874893188, "rewards/accuracies": 0.625, "rewards/chosen": -0.21910201013088226, "rewards/margins": 0.00029887259006500244, "rewards/rejected": -0.21940088272094727, "step": 705 }, { "epoch": 1.9329226557152634, "grad_norm": 3.6155524253845215, "learning_rate": 9.032876712328766e-07, "log_odds_chosen": 0.004201322793960571, "log_odds_ratio": -0.8900362849235535, "logits/chosen": -0.42586979269981384, "logits/rejected": -0.5083978772163391, "logps/chosen": -2.5756113529205322, "logps/rejected": -2.5348081588745117, "loss": 1.3732, "nll_loss": 1.284178376197815, "rewards/accuracies": 0.625, "rewards/chosen": -0.2575611472129822, "rewards/margins": -0.0040803514420986176, "rewards/rejected": -0.25348079204559326, "step": 706 }, { "epoch": 1.9356605065023955, "grad_norm": 3.7597782611846924, "learning_rate": 9.031506849315068e-07, "log_odds_chosen": 0.07288126647472382, "log_odds_ratio": -0.7769542932510376, "logits/chosen": -0.32901641726493835, "logits/rejected": -0.34547117352485657, "logps/chosen": -2.501953125, "logps/rejected": -2.5175552368164062, "loss": 1.2678, "nll_loss": 1.190138339996338, "rewards/accuracies": 0.75, "rewards/chosen": -0.25019532442092896, "rewards/margins": 0.001560186967253685, "rewards/rejected": -0.2517555058002472, "step": 707 }, { "epoch": 1.9383983572895276, "grad_norm": 3.175459861755371, "learning_rate": 9.03013698630137e-07, "log_odds_chosen": -0.3319739103317261, "log_odds_ratio": -1.010377049446106, "logits/chosen": -0.5060445070266724, "logits/rejected": -0.5106457471847534, "logps/chosen": -2.5078907012939453, "logps/rejected": -2.160186290740967, "loss": 1.326, "nll_loss": 1.2249866724014282, "rewards/accuracies": 0.5, "rewards/chosen": -0.250789076089859, "rewards/margins": -0.034770429134368896, "rewards/rejected": -0.2160186469554901, "step": 708 }, { "epoch": 1.9411362080766599, "grad_norm": 3.4847822189331055, "learning_rate": 9.02876712328767e-07, "log_odds_chosen": 0.2983318269252777, "log_odds_ratio": -0.7856838703155518, "logits/chosen": -0.3236534595489502, "logits/rejected": -0.30851876735687256, "logps/chosen": -2.550743818283081, "logps/rejected": -2.7959799766540527, "loss": 1.2728, "nll_loss": 1.1942658424377441, "rewards/accuracies": 0.5, "rewards/chosen": -0.2550743818283081, "rewards/margins": 0.024523617699742317, "rewards/rejected": -0.2795979976654053, "step": 709 }, { "epoch": 1.943874058863792, "grad_norm": 2.9693737030029297, "learning_rate": 9.027397260273972e-07, "log_odds_chosen": 0.44085052609443665, "log_odds_ratio": -0.5582904815673828, "logits/chosen": -0.34521809220314026, "logits/rejected": -0.3669300973415375, "logps/chosen": -2.1417505741119385, "logps/rejected": -2.5513827800750732, "loss": 1.3313, "nll_loss": 1.2754652500152588, "rewards/accuracies": 0.75, "rewards/chosen": -0.2141750603914261, "rewards/margins": 0.040963225066661835, "rewards/rejected": -0.2551382780075073, "step": 710 }, { "epoch": 1.946611909650924, "grad_norm": 2.7587339878082275, "learning_rate": 9.026027397260274e-07, "log_odds_chosen": 0.14129117131233215, "log_odds_ratio": -0.6831516027450562, "logits/chosen": -0.19425611197948456, "logits/rejected": -0.20896375179290771, "logps/chosen": -1.892181396484375, "logps/rejected": -2.035057783126831, "loss": 1.3212, "nll_loss": 1.2528866529464722, "rewards/accuracies": 0.875, "rewards/chosen": -0.18921813368797302, "rewards/margins": 0.014287644997239113, "rewards/rejected": -0.2035057693719864, "step": 711 }, { "epoch": 1.949349760438056, "grad_norm": 3.6121647357940674, "learning_rate": 9.024657534246574e-07, "log_odds_chosen": 0.00693434476852417, "log_odds_ratio": -0.7768999338150024, "logits/chosen": -0.17607003450393677, "logits/rejected": -0.15569797158241272, "logps/chosen": -2.453908920288086, "logps/rejected": -2.4623279571533203, "loss": 1.2048, "nll_loss": 1.1270906925201416, "rewards/accuracies": 0.625, "rewards/chosen": -0.2453908920288086, "rewards/margins": 0.0008419044315814972, "rewards/rejected": -0.2462327927350998, "step": 712 }, { "epoch": 1.9520876112251884, "grad_norm": 4.075098037719727, "learning_rate": 9.023287671232876e-07, "log_odds_chosen": -0.0783391147851944, "log_odds_ratio": -0.9830746650695801, "logits/chosen": -0.4206785261631012, "logits/rejected": -0.3833041787147522, "logps/chosen": -2.848714828491211, "logps/rejected": -2.695244312286377, "loss": 1.2452, "nll_loss": 1.1468764543533325, "rewards/accuracies": 0.625, "rewards/chosen": -0.28487148880958557, "rewards/margins": -0.015347057953476906, "rewards/rejected": -0.2695244550704956, "step": 713 }, { "epoch": 1.9548254620123204, "grad_norm": 3.43062162399292, "learning_rate": 9.021917808219177e-07, "log_odds_chosen": -0.18048609793186188, "log_odds_ratio": -0.8406488299369812, "logits/chosen": -0.22258691489696503, "logits/rejected": -0.21751919388771057, "logps/chosen": -2.6847667694091797, "logps/rejected": -2.4984641075134277, "loss": 1.3424, "nll_loss": 1.2583394050598145, "rewards/accuracies": 0.375, "rewards/chosen": -0.268476665019989, "rewards/margins": -0.018630273640155792, "rewards/rejected": -0.24984639883041382, "step": 714 }, { "epoch": 1.9575633127994525, "grad_norm": 4.224714756011963, "learning_rate": 9.020547945205479e-07, "log_odds_chosen": 0.16444572806358337, "log_odds_ratio": -0.8929709196090698, "logits/chosen": -0.16769370436668396, "logits/rejected": -0.11979241669178009, "logps/chosen": -3.4342031478881836, "logps/rejected": -3.5871005058288574, "loss": 1.2652, "nll_loss": 1.175870418548584, "rewards/accuracies": 0.375, "rewards/chosen": -0.3434202969074249, "rewards/margins": 0.015289761126041412, "rewards/rejected": -0.35871008038520813, "step": 715 }, { "epoch": 1.9603011635865846, "grad_norm": 3.5891971588134766, "learning_rate": 9.01917808219178e-07, "log_odds_chosen": 0.9875178337097168, "log_odds_ratio": -0.711132824420929, "logits/chosen": -0.47003525495529175, "logits/rejected": -0.511734664440155, "logps/chosen": -2.746666431427002, "logps/rejected": -3.6873459815979004, "loss": 1.3975, "nll_loss": 1.3264093399047852, "rewards/accuracies": 0.625, "rewards/chosen": -0.27466660737991333, "rewards/margins": 0.09406798332929611, "rewards/rejected": -0.36873459815979004, "step": 716 }, { "epoch": 1.9630390143737166, "grad_norm": 3.6467554569244385, "learning_rate": 9.017808219178081e-07, "log_odds_chosen": 0.1818077564239502, "log_odds_ratio": -0.6454270482063293, "logits/chosen": -0.3303704559803009, "logits/rejected": -0.3364409804344177, "logps/chosen": -2.6609272956848145, "logps/rejected": -2.8092398643493652, "loss": 1.1803, "nll_loss": 1.1157668828964233, "rewards/accuracies": 0.75, "rewards/chosen": -0.2660927176475525, "rewards/margins": 0.014831296168267727, "rewards/rejected": -0.2809240221977234, "step": 717 }, { "epoch": 1.9657768651608487, "grad_norm": 3.75041127204895, "learning_rate": 9.016438356164383e-07, "log_odds_chosen": -0.03885255753993988, "log_odds_ratio": -0.7520164251327515, "logits/chosen": -0.408199667930603, "logits/rejected": -0.3746974766254425, "logps/chosen": -2.5694000720977783, "logps/rejected": -2.5089921951293945, "loss": 1.3216, "nll_loss": 1.2463842630386353, "rewards/accuracies": 0.5, "rewards/chosen": -0.25694000720977783, "rewards/margins": -0.006040787324309349, "rewards/rejected": -0.25089922547340393, "step": 718 }, { "epoch": 1.9685147159479808, "grad_norm": 3.348459243774414, "learning_rate": 9.015068493150684e-07, "log_odds_chosen": 0.3785025179386139, "log_odds_ratio": -0.5541291832923889, "logits/chosen": -0.379104882478714, "logits/rejected": -0.3457738757133484, "logps/chosen": -2.3274805545806885, "logps/rejected": -2.635066509246826, "loss": 1.2321, "nll_loss": 1.1766830682754517, "rewards/accuracies": 0.75, "rewards/chosen": -0.23274807631969452, "rewards/margins": 0.03075859323143959, "rewards/rejected": -0.2635066509246826, "step": 719 }, { "epoch": 1.9712525667351128, "grad_norm": 2.9255547523498535, "learning_rate": 9.013698630136985e-07, "log_odds_chosen": 0.7513896226882935, "log_odds_ratio": -0.4554392695426941, "logits/chosen": -0.5129657983779907, "logits/rejected": -0.5100796818733215, "logps/chosen": -2.1513285636901855, "logps/rejected": -2.823808193206787, "loss": 1.277, "nll_loss": 1.2314832210540771, "rewards/accuracies": 0.875, "rewards/chosen": -0.21513286232948303, "rewards/margins": 0.06724792718887329, "rewards/rejected": -0.2823808193206787, "step": 720 }, { "epoch": 1.973990417522245, "grad_norm": 4.4749603271484375, "learning_rate": 9.012328767123287e-07, "log_odds_chosen": -0.5541087985038757, "log_odds_ratio": -1.1966311931610107, "logits/chosen": -0.2613872289657593, "logits/rejected": -0.18153470754623413, "logps/chosen": -3.4818286895751953, "logps/rejected": -2.9531807899475098, "loss": 1.3317, "nll_loss": 1.212000846862793, "rewards/accuracies": 0.25, "rewards/chosen": -0.34818291664123535, "rewards/margins": -0.05286479368805885, "rewards/rejected": -0.2953180968761444, "step": 721 }, { "epoch": 1.976728268309377, "grad_norm": 2.8068289756774902, "learning_rate": 9.01095890410959e-07, "log_odds_chosen": 0.3221631944179535, "log_odds_ratio": -0.7203763723373413, "logits/chosen": -0.3296698033809662, "logits/rejected": -0.43480756878852844, "logps/chosen": -1.9105395078659058, "logps/rejected": -2.1811726093292236, "loss": 1.2542, "nll_loss": 1.1821883916854858, "rewards/accuracies": 0.625, "rewards/chosen": -0.19105394184589386, "rewards/margins": 0.027063317596912384, "rewards/rejected": -0.21811726689338684, "step": 722 }, { "epoch": 1.9794661190965093, "grad_norm": 4.303776264190674, "learning_rate": 9.00958904109589e-07, "log_odds_chosen": -0.2624807059764862, "log_odds_ratio": -1.1366803646087646, "logits/chosen": -0.41536471247673035, "logits/rejected": -0.4348466396331787, "logps/chosen": -3.0064587593078613, "logps/rejected": -2.7127437591552734, "loss": 1.2934, "nll_loss": 1.1797500848770142, "rewards/accuracies": 0.375, "rewards/chosen": -0.3006458878517151, "rewards/margins": -0.029371492564678192, "rewards/rejected": -0.2712743878364563, "step": 723 }, { "epoch": 1.9822039698836413, "grad_norm": 3.533033847808838, "learning_rate": 9.008219178082192e-07, "log_odds_chosen": -0.03335762023925781, "log_odds_ratio": -0.8322951793670654, "logits/chosen": -0.44237589836120605, "logits/rejected": -0.4068635106086731, "logps/chosen": -2.8047611713409424, "logps/rejected": -2.730367660522461, "loss": 1.272, "nll_loss": 1.1887619495391846, "rewards/accuracies": 0.5, "rewards/chosen": -0.28047609329223633, "rewards/margins": -0.007439346984028816, "rewards/rejected": -0.27303674817085266, "step": 724 }, { "epoch": 1.9849418206707734, "grad_norm": 3.40486216545105, "learning_rate": 9.006849315068494e-07, "log_odds_chosen": 0.12625664472579956, "log_odds_ratio": -0.7127522826194763, "logits/chosen": -0.3292944133281708, "logits/rejected": -0.29126960039138794, "logps/chosen": -2.1702442169189453, "logps/rejected": -2.249340534210205, "loss": 1.2297, "nll_loss": 1.1584737300872803, "rewards/accuracies": 0.625, "rewards/chosen": -0.21702444553375244, "rewards/margins": 0.007909618318080902, "rewards/rejected": -0.22493405640125275, "step": 725 }, { "epoch": 1.9876796714579057, "grad_norm": 4.275327682495117, "learning_rate": 9.005479452054794e-07, "log_odds_chosen": 0.16222289204597473, "log_odds_ratio": -0.7849874496459961, "logits/chosen": -0.4290551543235779, "logits/rejected": -0.49329203367233276, "logps/chosen": -2.314310073852539, "logps/rejected": -2.4436540603637695, "loss": 1.2751, "nll_loss": 1.1965692043304443, "rewards/accuracies": 0.625, "rewards/chosen": -0.2314310073852539, "rewards/margins": 0.012934349477291107, "rewards/rejected": -0.2443653792142868, "step": 726 }, { "epoch": 1.9904175222450378, "grad_norm": 3.604067802429199, "learning_rate": 9.004109589041096e-07, "log_odds_chosen": 0.6603269577026367, "log_odds_ratio": -0.5715192556381226, "logits/chosen": -0.2829665541648865, "logits/rejected": -0.2634999454021454, "logps/chosen": -2.5939888954162598, "logps/rejected": -3.208024024963379, "loss": 1.169, "nll_loss": 1.111864447593689, "rewards/accuracies": 0.75, "rewards/chosen": -0.259398877620697, "rewards/margins": 0.06140352040529251, "rewards/rejected": -0.32080239057540894, "step": 727 }, { "epoch": 1.9931553730321698, "grad_norm": 2.906543731689453, "learning_rate": 9.002739726027398e-07, "log_odds_chosen": 0.3090478479862213, "log_odds_ratio": -0.5606142282485962, "logits/chosen": -0.4017757177352905, "logits/rejected": -0.42752349376678467, "logps/chosen": -2.202503204345703, "logps/rejected": -2.4575281143188477, "loss": 1.2577, "nll_loss": 1.2016305923461914, "rewards/accuracies": 0.875, "rewards/chosen": -0.22025030851364136, "rewards/margins": 0.025502484291791916, "rewards/rejected": -0.24575279653072357, "step": 728 }, { "epoch": 1.995893223819302, "grad_norm": 3.199707508087158, "learning_rate": 9.001369863013698e-07, "log_odds_chosen": 0.11069183051586151, "log_odds_ratio": -0.7360165119171143, "logits/chosen": -0.4061204493045807, "logits/rejected": -0.44815123081207275, "logps/chosen": -2.1150126457214355, "logps/rejected": -2.1263890266418457, "loss": 1.3235, "nll_loss": 1.2498983144760132, "rewards/accuracies": 0.625, "rewards/chosen": -0.21150130033493042, "rewards/margins": 0.0011376291513442993, "rewards/rejected": -0.21263892948627472, "step": 729 }, { "epoch": 1.998631074606434, "grad_norm": 3.9456207752227783, "learning_rate": 9e-07, "log_odds_chosen": 0.8658750057220459, "log_odds_ratio": -0.7697291970252991, "logits/chosen": -0.3845444619655609, "logits/rejected": -0.4153181314468384, "logps/chosen": -2.674499988555908, "logps/rejected": -3.469935417175293, "loss": 1.2963, "nll_loss": 1.2193615436553955, "rewards/accuracies": 0.5, "rewards/chosen": -0.2674500346183777, "rewards/margins": 0.07954350113868713, "rewards/rejected": -0.3469935357570648, "step": 730 }, { "epoch": 2.001368925393566, "grad_norm": 3.7854297161102295, "learning_rate": 8.998630136986301e-07, "log_odds_chosen": 0.17566898465156555, "log_odds_ratio": -0.9251694679260254, "logits/chosen": -0.23089317977428436, "logits/rejected": -0.2577977478504181, "logps/chosen": -3.2893054485321045, "logps/rejected": -3.4667916297912598, "loss": 1.3287, "nll_loss": 1.236161708831787, "rewards/accuracies": 0.625, "rewards/chosen": -0.3289305865764618, "rewards/margins": 0.017748594284057617, "rewards/rejected": -0.3466791808605194, "step": 731 }, { "epoch": 2.004106776180698, "grad_norm": 3.966510534286499, "learning_rate": 8.997260273972603e-07, "log_odds_chosen": 0.4031149744987488, "log_odds_ratio": -0.6059730648994446, "logits/chosen": -0.2352617383003235, "logits/rejected": -0.22951661050319672, "logps/chosen": -2.83402419090271, "logps/rejected": -3.1653265953063965, "loss": 1.2553, "nll_loss": 1.194668173789978, "rewards/accuracies": 0.75, "rewards/chosen": -0.2834024131298065, "rewards/margins": 0.0331302285194397, "rewards/rejected": -0.3165326714515686, "step": 732 }, { "epoch": 2.00684462696783, "grad_norm": 3.499242067337036, "learning_rate": 8.995890410958904e-07, "log_odds_chosen": -0.013406738638877869, "log_odds_ratio": -0.7947352528572083, "logits/chosen": -0.3138563930988312, "logits/rejected": -0.27575916051864624, "logps/chosen": -2.753840684890747, "logps/rejected": -2.70320200920105, "loss": 1.2062, "nll_loss": 1.1267249584197998, "rewards/accuracies": 0.5, "rewards/chosen": -0.2753840684890747, "rewards/margins": -0.0050638597458601, "rewards/rejected": -0.27032020688056946, "step": 733 }, { "epoch": 2.0095824777549622, "grad_norm": 3.031935453414917, "learning_rate": 8.994520547945205e-07, "log_odds_chosen": 1.1838912963867188, "log_odds_ratio": -0.6280543208122253, "logits/chosen": -0.24942775070667267, "logits/rejected": -0.3168172836303711, "logps/chosen": -2.2031214237213135, "logps/rejected": -3.3120365142822266, "loss": 1.2106, "nll_loss": 1.1478137969970703, "rewards/accuracies": 0.75, "rewards/chosen": -0.22031216323375702, "rewards/margins": 0.11089149117469788, "rewards/rejected": -0.3312036693096161, "step": 734 }, { "epoch": 2.0123203285420943, "grad_norm": 3.23396372795105, "learning_rate": 8.993150684931507e-07, "log_odds_chosen": 0.04521569609642029, "log_odds_ratio": -0.8138055205345154, "logits/chosen": -0.24701961874961853, "logits/rejected": -0.3177555203437805, "logps/chosen": -2.5629072189331055, "logps/rejected": -2.589378595352173, "loss": 1.2103, "nll_loss": 1.1289030313491821, "rewards/accuracies": 0.5, "rewards/chosen": -0.2562907338142395, "rewards/margins": 0.0026471372693777084, "rewards/rejected": -0.25893786549568176, "step": 735 }, { "epoch": 2.0150581793292264, "grad_norm": 3.855433225631714, "learning_rate": 8.991780821917808e-07, "log_odds_chosen": 0.05101408809423447, "log_odds_ratio": -0.7582480311393738, "logits/chosen": -0.26620644330978394, "logits/rejected": -0.2333371639251709, "logps/chosen": -2.407956123352051, "logps/rejected": -2.3832502365112305, "loss": 1.2477, "nll_loss": 1.1718519926071167, "rewards/accuracies": 0.625, "rewards/chosen": -0.24079559743404388, "rewards/margins": -0.002470560371875763, "rewards/rejected": -0.23832504451274872, "step": 736 }, { "epoch": 2.0177960301163584, "grad_norm": 2.6049554347991943, "learning_rate": 8.990410958904109e-07, "log_odds_chosen": 0.6805986166000366, "log_odds_ratio": -0.4635567367076874, "logits/chosen": -0.20874328911304474, "logits/rejected": -0.27549436688423157, "logps/chosen": -1.6908981800079346, "logps/rejected": -2.2671711444854736, "loss": 1.253, "nll_loss": 1.206594705581665, "rewards/accuracies": 0.875, "rewards/chosen": -0.16908980906009674, "rewards/margins": 0.05762731283903122, "rewards/rejected": -0.22671714425086975, "step": 737 }, { "epoch": 2.020533880903491, "grad_norm": 2.9024951457977295, "learning_rate": 8.989041095890411e-07, "log_odds_chosen": 0.012991532683372498, "log_odds_ratio": -0.7339613437652588, "logits/chosen": -0.2616117000579834, "logits/rejected": -0.2631676197052002, "logps/chosen": -1.8991434574127197, "logps/rejected": -1.8921005725860596, "loss": 1.2062, "nll_loss": 1.132784366607666, "rewards/accuracies": 0.625, "rewards/chosen": -0.18991437554359436, "rewards/margins": -0.0007042950019240379, "rewards/rejected": -0.18921005725860596, "step": 738 }, { "epoch": 2.023271731690623, "grad_norm": 3.7772035598754883, "learning_rate": 8.987671232876713e-07, "log_odds_chosen": -0.27339255809783936, "log_odds_ratio": -1.1572370529174805, "logits/chosen": -0.1882859468460083, "logits/rejected": -0.20115762948989868, "logps/chosen": -3.2968087196350098, "logps/rejected": -2.9533252716064453, "loss": 1.312, "nll_loss": 1.1962883472442627, "rewards/accuracies": 0.75, "rewards/chosen": -0.329680860042572, "rewards/margins": -0.03434832766652107, "rewards/rejected": -0.29533255100250244, "step": 739 }, { "epoch": 2.026009582477755, "grad_norm": 3.402667999267578, "learning_rate": 8.986301369863013e-07, "log_odds_chosen": 0.4066305160522461, "log_odds_ratio": -0.6196190118789673, "logits/chosen": -0.3475215435028076, "logits/rejected": -0.3554339110851288, "logps/chosen": -2.5732154846191406, "logps/rejected": -2.9427132606506348, "loss": 1.212, "nll_loss": 1.1500011682510376, "rewards/accuracies": 0.75, "rewards/chosen": -0.2573215365409851, "rewards/margins": 0.036949776113033295, "rewards/rejected": -0.2942713499069214, "step": 740 }, { "epoch": 2.028747433264887, "grad_norm": 3.355358362197876, "learning_rate": 8.984931506849315e-07, "log_odds_chosen": 0.18137109279632568, "log_odds_ratio": -0.8723700046539307, "logits/chosen": -0.14156168699264526, "logits/rejected": -0.18999673426151276, "logps/chosen": -2.5458333492279053, "logps/rejected": -2.648589849472046, "loss": 1.1433, "nll_loss": 1.0560870170593262, "rewards/accuracies": 0.75, "rewards/chosen": -0.25458335876464844, "rewards/margins": 0.010275620967149734, "rewards/rejected": -0.2648589611053467, "step": 741 }, { "epoch": 2.0314852840520192, "grad_norm": 3.6582109928131104, "learning_rate": 8.983561643835617e-07, "log_odds_chosen": 0.12223246693611145, "log_odds_ratio": -0.7815863490104675, "logits/chosen": -0.37083619832992554, "logits/rejected": -0.378887414932251, "logps/chosen": -2.984464645385742, "logps/rejected": -3.0796122550964355, "loss": 1.2835, "nll_loss": 1.205369472503662, "rewards/accuracies": 0.625, "rewards/chosen": -0.2984464764595032, "rewards/margins": 0.009514760226011276, "rewards/rejected": -0.30796122550964355, "step": 742 }, { "epoch": 2.0342231348391513, "grad_norm": 3.5351691246032715, "learning_rate": 8.982191780821917e-07, "log_odds_chosen": -0.6407381296157837, "log_odds_ratio": -1.169508457183838, "logits/chosen": -0.27901849150657654, "logits/rejected": -0.26629015803337097, "logps/chosen": -3.117702007293701, "logps/rejected": -2.477609872817993, "loss": 1.2629, "nll_loss": 1.1459137201309204, "rewards/accuracies": 0.25, "rewards/chosen": -0.3117702007293701, "rewards/margins": -0.06400923430919647, "rewards/rejected": -0.24776098132133484, "step": 743 }, { "epoch": 2.0369609856262834, "grad_norm": 3.094059705734253, "learning_rate": 8.980821917808219e-07, "log_odds_chosen": -0.5046650171279907, "log_odds_ratio": -1.2210419178009033, "logits/chosen": -0.2067049741744995, "logits/rejected": -0.24045059084892273, "logps/chosen": -2.312427043914795, "logps/rejected": -1.8128414154052734, "loss": 1.3713, "nll_loss": 1.2491629123687744, "rewards/accuracies": 0.5, "rewards/chosen": -0.23124273121356964, "rewards/margins": -0.04995858669281006, "rewards/rejected": -0.18128414452075958, "step": 744 }, { "epoch": 2.0396988364134154, "grad_norm": 2.5983386039733887, "learning_rate": 8.97945205479452e-07, "log_odds_chosen": 1.0385183095932007, "log_odds_ratio": -0.3522033393383026, "logits/chosen": -0.12984097003936768, "logits/rejected": -0.22236375510692596, "logps/chosen": -1.8370190858840942, "logps/rejected": -2.7867822647094727, "loss": 1.1741, "nll_loss": 1.1388952732086182, "rewards/accuracies": 0.875, "rewards/chosen": -0.18370190262794495, "rewards/margins": 0.09497633576393127, "rewards/rejected": -0.2786782383918762, "step": 745 }, { "epoch": 2.0424366872005475, "grad_norm": 4.212039947509766, "learning_rate": 8.978082191780822e-07, "log_odds_chosen": -0.3224409818649292, "log_odds_ratio": -1.0085313320159912, "logits/chosen": -0.18184149265289307, "logits/rejected": -0.14824770390987396, "logps/chosen": -3.145155429840088, "logps/rejected": -2.838336229324341, "loss": 1.1839, "nll_loss": 1.0830048322677612, "rewards/accuracies": 0.375, "rewards/chosen": -0.3145155608654022, "rewards/margins": -0.030681917443871498, "rewards/rejected": -0.2838336229324341, "step": 746 }, { "epoch": 2.0451745379876796, "grad_norm": 3.0553739070892334, "learning_rate": 8.976712328767123e-07, "log_odds_chosen": 0.12906304001808167, "log_odds_ratio": -0.7080461382865906, "logits/chosen": -0.29929521679878235, "logits/rejected": -0.3187829256057739, "logps/chosen": -2.3270177841186523, "logps/rejected": -2.4108786582946777, "loss": 1.1717, "nll_loss": 1.100900650024414, "rewards/accuracies": 0.75, "rewards/chosen": -0.23270177841186523, "rewards/margins": 0.008386070840060711, "rewards/rejected": -0.24108785390853882, "step": 747 }, { "epoch": 2.0479123887748116, "grad_norm": 2.718012809753418, "learning_rate": 8.975342465753424e-07, "log_odds_chosen": 0.7645596265792847, "log_odds_ratio": -0.5287569761276245, "logits/chosen": -0.3120652139186859, "logits/rejected": -0.3692845106124878, "logps/chosen": -2.0283453464508057, "logps/rejected": -2.6652464866638184, "loss": 1.2416, "nll_loss": 1.1886847019195557, "rewards/accuracies": 0.625, "rewards/chosen": -0.20283454656600952, "rewards/margins": 0.06369011104106903, "rewards/rejected": -0.26652464270591736, "step": 748 }, { "epoch": 2.0506502395619437, "grad_norm": 4.496124267578125, "learning_rate": 8.973972602739726e-07, "log_odds_chosen": -0.4973878562450409, "log_odds_ratio": -1.19907546043396, "logits/chosen": -0.27078723907470703, "logits/rejected": -0.21627402305603027, "logps/chosen": -3.709355354309082, "logps/rejected": -3.2087526321411133, "loss": 1.2146, "nll_loss": 1.0946892499923706, "rewards/accuracies": 0.375, "rewards/chosen": -0.37093549966812134, "rewards/margins": -0.05006024241447449, "rewards/rejected": -0.32087528705596924, "step": 749 }, { "epoch": 2.0533880903490758, "grad_norm": 3.4273648262023926, "learning_rate": 8.972602739726027e-07, "log_odds_chosen": -0.2029741108417511, "log_odds_ratio": -0.9773905873298645, "logits/chosen": -0.17498189210891724, "logits/rejected": -0.14655807614326477, "logps/chosen": -2.4636969566345215, "logps/rejected": -2.2425222396850586, "loss": 1.2509, "nll_loss": 1.1531407833099365, "rewards/accuracies": 0.5, "rewards/chosen": -0.24636970460414886, "rewards/margins": -0.022117463871836662, "rewards/rejected": -0.22425225377082825, "step": 750 }, { "epoch": 2.0561259411362083, "grad_norm": 3.681262493133545, "learning_rate": 8.971232876712328e-07, "log_odds_chosen": 0.1926642656326294, "log_odds_ratio": -0.7473519444465637, "logits/chosen": -0.1875031441450119, "logits/rejected": -0.23209434747695923, "logps/chosen": -2.552682638168335, "logps/rejected": -2.7174782752990723, "loss": 1.2749, "nll_loss": 1.2001917362213135, "rewards/accuracies": 0.75, "rewards/chosen": -0.25526827573776245, "rewards/margins": 0.01647956296801567, "rewards/rejected": -0.2717478275299072, "step": 751 }, { "epoch": 2.0588637919233403, "grad_norm": 2.8308138847351074, "learning_rate": 8.96986301369863e-07, "log_odds_chosen": 0.2112082988023758, "log_odds_ratio": -0.7349548935890198, "logits/chosen": -0.15141688287258148, "logits/rejected": -0.20856791734695435, "logps/chosen": -2.298917293548584, "logps/rejected": -2.4525070190429688, "loss": 1.26, "nll_loss": 1.1865136623382568, "rewards/accuracies": 0.5, "rewards/chosen": -0.22989173233509064, "rewards/margins": 0.015358982607722282, "rewards/rejected": -0.24525070190429688, "step": 752 }, { "epoch": 2.0616016427104724, "grad_norm": 3.811318874359131, "learning_rate": 8.968493150684932e-07, "log_odds_chosen": -0.08441556990146637, "log_odds_ratio": -0.8178657293319702, "logits/chosen": -0.3349706828594208, "logits/rejected": -0.2702355682849884, "logps/chosen": -2.442626953125, "logps/rejected": -2.321697235107422, "loss": 1.2208, "nll_loss": 1.13901686668396, "rewards/accuracies": 0.375, "rewards/chosen": -0.2442627102136612, "rewards/margins": -0.012092987075448036, "rewards/rejected": -0.2321697175502777, "step": 753 }, { "epoch": 2.0643394934976045, "grad_norm": 3.5929293632507324, "learning_rate": 8.967123287671232e-07, "log_odds_chosen": 0.16774624586105347, "log_odds_ratio": -0.719016432762146, "logits/chosen": -0.28279128670692444, "logits/rejected": -0.33403676748275757, "logps/chosen": -2.752232551574707, "logps/rejected": -2.9002175331115723, "loss": 1.2372, "nll_loss": 1.1652836799621582, "rewards/accuracies": 0.625, "rewards/chosen": -0.2752232551574707, "rewards/margins": 0.0147984828799963, "rewards/rejected": -0.29002174735069275, "step": 754 }, { "epoch": 2.0670773442847366, "grad_norm": 4.086679935455322, "learning_rate": 8.965753424657534e-07, "log_odds_chosen": -0.19233295321464539, "log_odds_ratio": -1.0057586431503296, "logits/chosen": -0.15289781987667084, "logits/rejected": -0.19054874777793884, "logps/chosen": -2.930845260620117, "logps/rejected": -2.6873273849487305, "loss": 1.1775, "nll_loss": 1.0769413709640503, "rewards/accuracies": 0.625, "rewards/chosen": -0.2930845320224762, "rewards/margins": -0.024351784959435463, "rewards/rejected": -0.2687327265739441, "step": 755 }, { "epoch": 2.0698151950718686, "grad_norm": 3.218721866607666, "learning_rate": 8.964383561643836e-07, "log_odds_chosen": -0.10643010586500168, "log_odds_ratio": -0.8379544019699097, "logits/chosen": -0.16883081197738647, "logits/rejected": -0.16088367998600006, "logps/chosen": -2.5611419677734375, "logps/rejected": -2.453512668609619, "loss": 1.2215, "nll_loss": 1.137696623802185, "rewards/accuracies": 0.5, "rewards/chosen": -0.2561141848564148, "rewards/margins": -0.010762904770672321, "rewards/rejected": -0.24535128474235535, "step": 756 }, { "epoch": 2.0725530458590007, "grad_norm": 4.179019927978516, "learning_rate": 8.963013698630136e-07, "log_odds_chosen": 0.2991503179073334, "log_odds_ratio": -0.7486932277679443, "logits/chosen": -0.2833600640296936, "logits/rejected": -0.2934255599975586, "logps/chosen": -3.307755708694458, "logps/rejected": -3.572084665298462, "loss": 1.3189, "nll_loss": 1.244053602218628, "rewards/accuracies": 0.625, "rewards/chosen": -0.33077558875083923, "rewards/margins": 0.026432868093252182, "rewards/rejected": -0.3572084605693817, "step": 757 }, { "epoch": 2.0752908966461328, "grad_norm": 3.9277546405792236, "learning_rate": 8.961643835616438e-07, "log_odds_chosen": 0.4222169816493988, "log_odds_ratio": -0.6054429411888123, "logits/chosen": -0.11418693512678146, "logits/rejected": -0.06263267993927002, "logps/chosen": -2.4473462104797363, "logps/rejected": -2.826887369155884, "loss": 1.1628, "nll_loss": 1.102285623550415, "rewards/accuracies": 0.625, "rewards/chosen": -0.24473461508750916, "rewards/margins": 0.037954121828079224, "rewards/rejected": -0.2826887369155884, "step": 758 }, { "epoch": 2.078028747433265, "grad_norm": 2.954317092895508, "learning_rate": 8.96027397260274e-07, "log_odds_chosen": 1.060511827468872, "log_odds_ratio": -0.3840029537677765, "logits/chosen": -0.08898914605379105, "logits/rejected": -0.09521341323852539, "logps/chosen": -2.00484299659729, "logps/rejected": -2.980362892150879, "loss": 1.0834, "nll_loss": 1.0449596643447876, "rewards/accuracies": 0.875, "rewards/chosen": -0.20048430562019348, "rewards/margins": 0.09755198657512665, "rewards/rejected": -0.29803627729415894, "step": 759 }, { "epoch": 2.080766598220397, "grad_norm": 3.2279889583587646, "learning_rate": 8.958904109589041e-07, "log_odds_chosen": 0.14243477582931519, "log_odds_ratio": -0.81766676902771, "logits/chosen": -0.2609129548072815, "logits/rejected": -0.2453298568725586, "logps/chosen": -2.9173340797424316, "logps/rejected": -3.0451807975769043, "loss": 1.2642, "nll_loss": 1.1824580430984497, "rewards/accuracies": 0.375, "rewards/chosen": -0.29173338413238525, "rewards/margins": 0.012784678488969803, "rewards/rejected": -0.30451810359954834, "step": 760 }, { "epoch": 2.083504449007529, "grad_norm": 2.910715341567993, "learning_rate": 8.957534246575342e-07, "log_odds_chosen": 0.3128640353679657, "log_odds_ratio": -0.5834141969680786, "logits/chosen": -0.20296788215637207, "logits/rejected": -0.24525031447410583, "logps/chosen": -2.0313501358032227, "logps/rejected": -2.317115068435669, "loss": 1.2423, "nll_loss": 1.1839739084243774, "rewards/accuracies": 0.75, "rewards/chosen": -0.20313501358032227, "rewards/margins": 0.028576498851180077, "rewards/rejected": -0.2317115217447281, "step": 761 }, { "epoch": 2.086242299794661, "grad_norm": 2.9169037342071533, "learning_rate": 8.956164383561643e-07, "log_odds_chosen": 0.37476855516433716, "log_odds_ratio": -0.7744293808937073, "logits/chosen": -0.16459250450134277, "logits/rejected": -0.1741580367088318, "logps/chosen": -2.2421047687530518, "logps/rejected": -2.533336639404297, "loss": 1.2623, "nll_loss": 1.1848379373550415, "rewards/accuracies": 0.875, "rewards/chosen": -0.2242104709148407, "rewards/margins": 0.029123201966285706, "rewards/rejected": -0.2533336877822876, "step": 762 }, { "epoch": 2.088980150581793, "grad_norm": 3.0295138359069824, "learning_rate": 8.954794520547945e-07, "log_odds_chosen": 0.4394742250442505, "log_odds_ratio": -0.5714805722236633, "logits/chosen": -0.20004898309707642, "logits/rejected": -0.2167324423789978, "logps/chosen": -1.9516253471374512, "logps/rejected": -2.3130111694335938, "loss": 1.25, "nll_loss": 1.19281005859375, "rewards/accuracies": 0.625, "rewards/chosen": -0.19516253471374512, "rewards/margins": 0.03613857924938202, "rewards/rejected": -0.23130109906196594, "step": 763 }, { "epoch": 2.0917180013689256, "grad_norm": 2.923671245574951, "learning_rate": 8.953424657534246e-07, "log_odds_chosen": 0.17288976907730103, "log_odds_ratio": -0.7332188487052917, "logits/chosen": -0.389191210269928, "logits/rejected": -0.4417943060398102, "logps/chosen": -2.4237160682678223, "logps/rejected": -2.5420894622802734, "loss": 1.2071, "nll_loss": 1.1337352991104126, "rewards/accuracies": 0.625, "rewards/chosen": -0.24237161874771118, "rewards/margins": 0.011837340891361237, "rewards/rejected": -0.2542089819908142, "step": 764 }, { "epoch": 2.0944558521560577, "grad_norm": 3.3581013679504395, "learning_rate": 8.952054794520547e-07, "log_odds_chosen": 0.9163177013397217, "log_odds_ratio": -0.4136762022972107, "logits/chosen": -0.3526359796524048, "logits/rejected": -0.3519701659679413, "logps/chosen": -2.001035213470459, "logps/rejected": -2.819439649581909, "loss": 1.207, "nll_loss": 1.1656641960144043, "rewards/accuracies": 0.875, "rewards/chosen": -0.2001035213470459, "rewards/margins": 0.08184045553207397, "rewards/rejected": -0.2819439768791199, "step": 765 }, { "epoch": 2.0971937029431897, "grad_norm": 2.9433279037475586, "learning_rate": 8.950684931506849e-07, "log_odds_chosen": 0.661331057548523, "log_odds_ratio": -0.44651615619659424, "logits/chosen": -0.09312275052070618, "logits/rejected": -0.11556186527013779, "logps/chosen": -2.3421711921691895, "logps/rejected": -2.945751667022705, "loss": 1.181, "nll_loss": 1.1363210678100586, "rewards/accuracies": 0.875, "rewards/chosen": -0.23421713709831238, "rewards/margins": 0.060358040034770966, "rewards/rejected": -0.29457515478134155, "step": 766 }, { "epoch": 2.099931553730322, "grad_norm": 3.520134687423706, "learning_rate": 8.949315068493151e-07, "log_odds_chosen": 0.279226690530777, "log_odds_ratio": -0.8396889567375183, "logits/chosen": -0.16332414746284485, "logits/rejected": -0.08135007321834564, "logps/chosen": -2.531637668609619, "logps/rejected": -2.7546000480651855, "loss": 1.1929, "nll_loss": 1.1089802980422974, "rewards/accuracies": 0.75, "rewards/chosen": -0.25316375494003296, "rewards/margins": 0.022296255454421043, "rewards/rejected": -0.27546000480651855, "step": 767 }, { "epoch": 2.102669404517454, "grad_norm": 3.748908758163452, "learning_rate": 8.947945205479451e-07, "log_odds_chosen": 1.0379955768585205, "log_odds_ratio": -0.8512529134750366, "logits/chosen": -0.19837841391563416, "logits/rejected": -0.19300015270709991, "logps/chosen": -2.2529313564300537, "logps/rejected": -3.187108039855957, "loss": 1.2385, "nll_loss": 1.153389811515808, "rewards/accuracies": 0.625, "rewards/chosen": -0.22529315948486328, "rewards/margins": 0.09341765940189362, "rewards/rejected": -0.3187108039855957, "step": 768 }, { "epoch": 2.105407255304586, "grad_norm": 2.9120137691497803, "learning_rate": 8.946575342465753e-07, "log_odds_chosen": 0.3428221344947815, "log_odds_ratio": -0.7247883081436157, "logits/chosen": -0.13356107473373413, "logits/rejected": -0.18477827310562134, "logps/chosen": -2.3674001693725586, "logps/rejected": -2.674414873123169, "loss": 1.2245, "nll_loss": 1.152048945426941, "rewards/accuracies": 0.75, "rewards/chosen": -0.23674002289772034, "rewards/margins": 0.03070145659148693, "rewards/rejected": -0.2674414813518524, "step": 769 }, { "epoch": 2.108145106091718, "grad_norm": 2.887843370437622, "learning_rate": 8.945205479452055e-07, "log_odds_chosen": 0.3951650857925415, "log_odds_ratio": -0.5841624736785889, "logits/chosen": -0.343359112739563, "logits/rejected": -0.3970956802368164, "logps/chosen": -2.2189488410949707, "logps/rejected": -2.5237741470336914, "loss": 1.1652, "nll_loss": 1.1067816019058228, "rewards/accuracies": 0.875, "rewards/chosen": -0.22189490497112274, "rewards/margins": 0.030482519418001175, "rewards/rejected": -0.25237739086151123, "step": 770 }, { "epoch": 2.11088295687885, "grad_norm": 3.4974889755249023, "learning_rate": 8.943835616438355e-07, "log_odds_chosen": 1.5564707517623901, "log_odds_ratio": -0.7185657024383545, "logits/chosen": -0.16582825779914856, "logits/rejected": -0.16106539964675903, "logps/chosen": -2.6255404949188232, "logps/rejected": -4.062534332275391, "loss": 1.1103, "nll_loss": 1.0384749174118042, "rewards/accuracies": 0.75, "rewards/chosen": -0.2625540792942047, "rewards/margins": 0.14369933307170868, "rewards/rejected": -0.4062533974647522, "step": 771 }, { "epoch": 2.113620807665982, "grad_norm": 3.7036232948303223, "learning_rate": 8.942465753424657e-07, "log_odds_chosen": -0.506956934928894, "log_odds_ratio": -1.1221404075622559, "logits/chosen": -0.3784157633781433, "logits/rejected": -0.39122915267944336, "logps/chosen": -2.8172762393951416, "logps/rejected": -2.3108742237091064, "loss": 1.2879, "nll_loss": 1.1757056713104248, "rewards/accuracies": 0.375, "rewards/chosen": -0.2817276120185852, "rewards/margins": -0.05064019933342934, "rewards/rejected": -0.23108741641044617, "step": 772 }, { "epoch": 2.116358658453114, "grad_norm": 3.5829825401306152, "learning_rate": 8.941095890410959e-07, "log_odds_chosen": -0.010569501668214798, "log_odds_ratio": -0.776755690574646, "logits/chosen": -0.2846038341522217, "logits/rejected": -0.2686532139778137, "logps/chosen": -2.12616229057312, "logps/rejected": -2.11358904838562, "loss": 1.2435, "nll_loss": 1.1658332347869873, "rewards/accuracies": 0.5, "rewards/chosen": -0.2126162350177765, "rewards/margins": -0.0012573283165693283, "rewards/rejected": -0.211358904838562, "step": 773 }, { "epoch": 2.1190965092402463, "grad_norm": 3.346693277359009, "learning_rate": 8.93972602739726e-07, "log_odds_chosen": 0.12593159079551697, "log_odds_ratio": -0.749460756778717, "logits/chosen": -0.1614973545074463, "logits/rejected": -0.13826215267181396, "logps/chosen": -2.571735382080078, "logps/rejected": -2.6769919395446777, "loss": 1.1339, "nll_loss": 1.058933973312378, "rewards/accuracies": 0.625, "rewards/chosen": -0.2571735382080078, "rewards/margins": 0.010525654070079327, "rewards/rejected": -0.2676992118358612, "step": 774 }, { "epoch": 2.1218343600273784, "grad_norm": 3.691711187362671, "learning_rate": 8.938356164383561e-07, "log_odds_chosen": -0.21144512295722961, "log_odds_ratio": -1.0617766380310059, "logits/chosen": -0.14233717322349548, "logits/rejected": -0.10090502351522446, "logps/chosen": -3.20296573638916, "logps/rejected": -2.947152853012085, "loss": 1.1598, "nll_loss": 1.0535835027694702, "rewards/accuracies": 0.5, "rewards/chosen": -0.3202965557575226, "rewards/margins": -0.02558128349483013, "rewards/rejected": -0.2947152853012085, "step": 775 }, { "epoch": 2.1245722108145104, "grad_norm": 3.332902193069458, "learning_rate": 8.936986301369862e-07, "log_odds_chosen": -0.30334019660949707, "log_odds_ratio": -0.9654768705368042, "logits/chosen": -0.2890825569629669, "logits/rejected": -0.2992725074291229, "logps/chosen": -2.883452892303467, "logps/rejected": -2.580519199371338, "loss": 1.3277, "nll_loss": 1.2311593294143677, "rewards/accuracies": 0.375, "rewards/chosen": -0.2883452773094177, "rewards/margins": -0.030293360352516174, "rewards/rejected": -0.25805193185806274, "step": 776 }, { "epoch": 2.1273100616016425, "grad_norm": 4.511305332183838, "learning_rate": 8.935616438356164e-07, "log_odds_chosen": -0.2585969567298889, "log_odds_ratio": -1.0466241836547852, "logits/chosen": -0.34693118929862976, "logits/rejected": -0.3607734441757202, "logps/chosen": -3.108565092086792, "logps/rejected": -2.8211328983306885, "loss": 1.1708, "nll_loss": 1.066124677658081, "rewards/accuracies": 0.625, "rewards/chosen": -0.31085652112960815, "rewards/margins": -0.028743229806423187, "rewards/rejected": -0.28211331367492676, "step": 777 }, { "epoch": 2.130047912388775, "grad_norm": 3.0022830963134766, "learning_rate": 8.934246575342465e-07, "log_odds_chosen": 0.10263031721115112, "log_odds_ratio": -0.6935178637504578, "logits/chosen": -0.11038808524608612, "logits/rejected": -0.18006327748298645, "logps/chosen": -2.1897401809692383, "logps/rejected": -2.266444683074951, "loss": 1.2867, "nll_loss": 1.2173787355422974, "rewards/accuracies": 0.625, "rewards/chosen": -0.2189740240573883, "rewards/margins": 0.0076704490929841995, "rewards/rejected": -0.22664448618888855, "step": 778 }, { "epoch": 2.132785763175907, "grad_norm": 3.626646041870117, "learning_rate": 8.932876712328766e-07, "log_odds_chosen": 0.14277975261211395, "log_odds_ratio": -0.7306528687477112, "logits/chosen": -0.3323051631450653, "logits/rejected": -0.4019501805305481, "logps/chosen": -2.628307819366455, "logps/rejected": -2.7354423999786377, "loss": 1.2463, "nll_loss": 1.1732603311538696, "rewards/accuracies": 0.625, "rewards/chosen": -0.26283079385757446, "rewards/margins": 0.010713450610637665, "rewards/rejected": -0.2735442519187927, "step": 779 }, { "epoch": 2.135523613963039, "grad_norm": 2.7013251781463623, "learning_rate": 8.931506849315068e-07, "log_odds_chosen": 1.5508381128311157, "log_odds_ratio": -0.30901578068733215, "logits/chosen": -0.2386077344417572, "logits/rejected": -0.4579322934150696, "logps/chosen": -1.7389085292816162, "logps/rejected": -3.138711929321289, "loss": 1.1547, "nll_loss": 1.1237655878067017, "rewards/accuracies": 0.875, "rewards/chosen": -0.1738908588886261, "rewards/margins": 0.13998033106327057, "rewards/rejected": -0.31387120485305786, "step": 780 }, { "epoch": 2.138261464750171, "grad_norm": 3.7213661670684814, "learning_rate": 8.93013698630137e-07, "log_odds_chosen": 0.2657528817653656, "log_odds_ratio": -0.6211344003677368, "logits/chosen": -0.23830316960811615, "logits/rejected": -0.2546798884868622, "logps/chosen": -2.4674553871154785, "logps/rejected": -2.711550712585449, "loss": 1.1931, "nll_loss": 1.1309797763824463, "rewards/accuracies": 0.625, "rewards/chosen": -0.2467455416917801, "rewards/margins": 0.024409551173448563, "rewards/rejected": -0.27115508913993835, "step": 781 }, { "epoch": 2.1409993155373033, "grad_norm": 2.9815049171447754, "learning_rate": 8.92876712328767e-07, "log_odds_chosen": 0.5696236491203308, "log_odds_ratio": -0.5776989459991455, "logits/chosen": -0.22472620010375977, "logits/rejected": -0.25622430443763733, "logps/chosen": -2.289020299911499, "logps/rejected": -2.8342902660369873, "loss": 1.2417, "nll_loss": 1.1839081048965454, "rewards/accuracies": 0.625, "rewards/chosen": -0.22890202701091766, "rewards/margins": 0.05452701076865196, "rewards/rejected": -0.28342902660369873, "step": 782 }, { "epoch": 2.1437371663244353, "grad_norm": 3.118095636367798, "learning_rate": 8.927397260273972e-07, "log_odds_chosen": 0.5671663284301758, "log_odds_ratio": -0.5272344350814819, "logits/chosen": -0.20679442584514618, "logits/rejected": -0.27646148204803467, "logps/chosen": -1.906483769416809, "logps/rejected": -2.363074541091919, "loss": 1.1966, "nll_loss": 1.1438325643539429, "rewards/accuracies": 0.75, "rewards/chosen": -0.1906483769416809, "rewards/margins": 0.04565909504890442, "rewards/rejected": -0.23630747199058533, "step": 783 }, { "epoch": 2.1464750171115674, "grad_norm": 2.7404985427856445, "learning_rate": 8.926027397260274e-07, "log_odds_chosen": 1.089818000793457, "log_odds_ratio": -0.4195016324520111, "logits/chosen": -0.16034826636314392, "logits/rejected": -0.3084985017776489, "logps/chosen": -1.9374563694000244, "logps/rejected": -2.8986568450927734, "loss": 1.1234, "nll_loss": 1.081444263458252, "rewards/accuracies": 0.75, "rewards/chosen": -0.19374564290046692, "rewards/margins": 0.09612004458904266, "rewards/rejected": -0.2898656725883484, "step": 784 }, { "epoch": 2.1492128678986995, "grad_norm": 3.294726848602295, "learning_rate": 8.924657534246574e-07, "log_odds_chosen": -0.2304578721523285, "log_odds_ratio": -0.9631335735321045, "logits/chosen": -0.2354559600353241, "logits/rejected": -0.1875310242176056, "logps/chosen": -2.4348645210266113, "logps/rejected": -2.2230618000030518, "loss": 1.2457, "nll_loss": 1.1493446826934814, "rewards/accuracies": 0.625, "rewards/chosen": -0.2434864491224289, "rewards/margins": -0.021180273965001106, "rewards/rejected": -0.22230619192123413, "step": 785 }, { "epoch": 2.1519507186858315, "grad_norm": 3.332702159881592, "learning_rate": 8.923287671232876e-07, "log_odds_chosen": 0.24997591972351074, "log_odds_ratio": -0.97978276014328, "logits/chosen": -0.21222606301307678, "logits/rejected": -0.18222694098949432, "logps/chosen": -2.955056667327881, "logps/rejected": -3.1717913150787354, "loss": 1.2506, "nll_loss": 1.1525940895080566, "rewards/accuracies": 0.75, "rewards/chosen": -0.29550567269325256, "rewards/margins": 0.021673455834388733, "rewards/rejected": -0.3171791434288025, "step": 786 }, { "epoch": 2.1546885694729636, "grad_norm": 2.8741867542266846, "learning_rate": 8.921917808219178e-07, "log_odds_chosen": -0.012974634766578674, "log_odds_ratio": -0.782257080078125, "logits/chosen": -0.11826728284358978, "logits/rejected": -0.15543939173221588, "logps/chosen": -2.3374764919281006, "logps/rejected": -2.3072731494903564, "loss": 1.1608, "nll_loss": 1.082566738128662, "rewards/accuracies": 0.625, "rewards/chosen": -0.23374763131141663, "rewards/margins": -0.003020324744284153, "rewards/rejected": -0.23072731494903564, "step": 787 }, { "epoch": 2.1574264202600957, "grad_norm": 3.9113409519195557, "learning_rate": 8.920547945205479e-07, "log_odds_chosen": -0.3279714584350586, "log_odds_ratio": -1.1169672012329102, "logits/chosen": -0.1674812138080597, "logits/rejected": -0.14601945877075195, "logps/chosen": -2.7879366874694824, "logps/rejected": -2.423762321472168, "loss": 1.2114, "nll_loss": 1.0996596813201904, "rewards/accuracies": 0.5, "rewards/chosen": -0.2787936329841614, "rewards/margins": -0.0364174023270607, "rewards/rejected": -0.24237625300884247, "step": 788 }, { "epoch": 2.1601642710472277, "grad_norm": 3.338165760040283, "learning_rate": 8.91917808219178e-07, "log_odds_chosen": 0.4638398289680481, "log_odds_ratio": -0.7551308870315552, "logits/chosen": -0.11049741506576538, "logits/rejected": -0.1564701944589615, "logps/chosen": -2.3677685260772705, "logps/rejected": -2.8028481006622314, "loss": 1.2414, "nll_loss": 1.1659314632415771, "rewards/accuracies": 0.5, "rewards/chosen": -0.23677688837051392, "rewards/margins": 0.04350794106721878, "rewards/rejected": -0.2802847921848297, "step": 789 }, { "epoch": 2.1629021218343603, "grad_norm": 2.901881694793701, "learning_rate": 8.917808219178081e-07, "log_odds_chosen": 0.46482378244400024, "log_odds_ratio": -0.6638022661209106, "logits/chosen": -0.20815351605415344, "logits/rejected": -0.267879456281662, "logps/chosen": -2.0874242782592773, "logps/rejected": -2.474524974822998, "loss": 1.2037, "nll_loss": 1.1372853517532349, "rewards/accuracies": 0.625, "rewards/chosen": -0.2087424248456955, "rewards/margins": 0.03871007263660431, "rewards/rejected": -0.247452512383461, "step": 790 }, { "epoch": 2.1656399726214923, "grad_norm": 4.37553596496582, "learning_rate": 8.916438356164383e-07, "log_odds_chosen": 0.05785007029771805, "log_odds_ratio": -0.8270888328552246, "logits/chosen": -0.16708986461162567, "logits/rejected": -0.22320085763931274, "logps/chosen": -2.4423627853393555, "logps/rejected": -2.4716780185699463, "loss": 1.2242, "nll_loss": 1.141479253768921, "rewards/accuracies": 0.75, "rewards/chosen": -0.24423626065254211, "rewards/margins": 0.00293152779340744, "rewards/rejected": -0.24716781079769135, "step": 791 }, { "epoch": 2.1683778234086244, "grad_norm": 3.774381160736084, "learning_rate": 8.915068493150684e-07, "log_odds_chosen": -0.302019864320755, "log_odds_ratio": -1.0921645164489746, "logits/chosen": -0.09063136577606201, "logits/rejected": -0.04464815557003021, "logps/chosen": -3.5664055347442627, "logps/rejected": -3.2323827743530273, "loss": 1.2538, "nll_loss": 1.1445403099060059, "rewards/accuracies": 0.375, "rewards/chosen": -0.3566405773162842, "rewards/margins": -0.03340225666761398, "rewards/rejected": -0.3232383131980896, "step": 792 }, { "epoch": 2.1711156741957565, "grad_norm": 2.767772912979126, "learning_rate": 8.913698630136985e-07, "log_odds_chosen": 0.6971133947372437, "log_odds_ratio": -0.5596455931663513, "logits/chosen": -0.15912765264511108, "logits/rejected": -0.2249632477760315, "logps/chosen": -2.0867536067962646, "logps/rejected": -2.7230639457702637, "loss": 1.2512, "nll_loss": 1.1952731609344482, "rewards/accuracies": 0.875, "rewards/chosen": -0.208675354719162, "rewards/margins": 0.06363105028867722, "rewards/rejected": -0.2723064124584198, "step": 793 }, { "epoch": 2.1738535249828885, "grad_norm": 2.9592363834381104, "learning_rate": 8.912328767123287e-07, "log_odds_chosen": 0.681065559387207, "log_odds_ratio": -0.5182665586471558, "logits/chosen": -0.1786392629146576, "logits/rejected": -0.14381210505962372, "logps/chosen": -1.9538408517837524, "logps/rejected": -2.545252799987793, "loss": 1.1341, "nll_loss": 1.0822815895080566, "rewards/accuracies": 0.75, "rewards/chosen": -0.19538408517837524, "rewards/margins": 0.059141188859939575, "rewards/rejected": -0.2545252740383148, "step": 794 }, { "epoch": 2.1765913757700206, "grad_norm": 2.8893539905548096, "learning_rate": 8.910958904109589e-07, "log_odds_chosen": 0.5696443319320679, "log_odds_ratio": -0.6285045146942139, "logits/chosen": -0.05685832351446152, "logits/rejected": -0.048186324536800385, "logps/chosen": -2.4575400352478027, "logps/rejected": -2.9724273681640625, "loss": 1.2359, "nll_loss": 1.173032522201538, "rewards/accuracies": 0.75, "rewards/chosen": -0.24575400352478027, "rewards/margins": 0.051488716155290604, "rewards/rejected": -0.2972427308559418, "step": 795 }, { "epoch": 2.1793292265571527, "grad_norm": 3.126704216003418, "learning_rate": 8.909589041095889e-07, "log_odds_chosen": -0.07627225667238235, "log_odds_ratio": -0.8075413107872009, "logits/chosen": -0.2530326247215271, "logits/rejected": -0.2820017635822296, "logps/chosen": -2.219794511795044, "logps/rejected": -2.115473747253418, "loss": 1.2154, "nll_loss": 1.1346217393875122, "rewards/accuracies": 0.625, "rewards/chosen": -0.22197945415973663, "rewards/margins": -0.01043207198381424, "rewards/rejected": -0.211547389626503, "step": 796 }, { "epoch": 2.1820670773442847, "grad_norm": 2.9084370136260986, "learning_rate": 8.908219178082191e-07, "log_odds_chosen": 0.21346238255500793, "log_odds_ratio": -0.7396453619003296, "logits/chosen": 0.042782336473464966, "logits/rejected": 0.08050283044576645, "logps/chosen": -2.2554945945739746, "logps/rejected": -2.4804656505584717, "loss": 1.179, "nll_loss": 1.1050597429275513, "rewards/accuracies": 0.625, "rewards/chosen": -0.22554948925971985, "rewards/margins": 0.02249707467854023, "rewards/rejected": -0.24804654717445374, "step": 797 }, { "epoch": 2.184804928131417, "grad_norm": 2.645080089569092, "learning_rate": 8.906849315068493e-07, "log_odds_chosen": 0.7401279211044312, "log_odds_ratio": -0.5297625660896301, "logits/chosen": -0.11076535284519196, "logits/rejected": -0.1770978569984436, "logps/chosen": -1.9243777990341187, "logps/rejected": -2.4798073768615723, "loss": 1.1767, "nll_loss": 1.1236923933029175, "rewards/accuracies": 0.75, "rewards/chosen": -0.1924377977848053, "rewards/margins": 0.05554294213652611, "rewards/rejected": -0.24798071384429932, "step": 798 }, { "epoch": 2.187542778918549, "grad_norm": 3.6962296962738037, "learning_rate": 8.905479452054793e-07, "log_odds_chosen": -0.377600759267807, "log_odds_ratio": -1.007590413093567, "logits/chosen": -0.034272752702236176, "logits/rejected": -6.58608041703701e-05, "logps/chosen": -3.117779016494751, "logps/rejected": -2.7631397247314453, "loss": 1.192, "nll_loss": 1.0912753343582153, "rewards/accuracies": 0.375, "rewards/chosen": -0.31177788972854614, "rewards/margins": -0.035463906824588776, "rewards/rejected": -0.27631399035453796, "step": 799 }, { "epoch": 2.190280629705681, "grad_norm": 3.7170569896698, "learning_rate": 8.904109589041095e-07, "log_odds_chosen": -0.043003201484680176, "log_odds_ratio": -0.8439248204231262, "logits/chosen": -0.114616759121418, "logits/rejected": -0.15528468787670135, "logps/chosen": -2.7473177909851074, "logps/rejected": -2.6821866035461426, "loss": 1.1944, "nll_loss": 1.1100564002990723, "rewards/accuracies": 0.5, "rewards/chosen": -0.2747317850589752, "rewards/margins": -0.006513124331831932, "rewards/rejected": -0.26821866631507874, "step": 800 }, { "epoch": 2.193018480492813, "grad_norm": 2.6991066932678223, "learning_rate": 8.902739726027398e-07, "log_odds_chosen": 0.6641940474510193, "log_odds_ratio": -0.44880861043930054, "logits/chosen": -0.08476556837558746, "logits/rejected": -0.08102284371852875, "logps/chosen": -1.8592751026153564, "logps/rejected": -2.437304973602295, "loss": 1.0929, "nll_loss": 1.0480506420135498, "rewards/accuracies": 0.75, "rewards/chosen": -0.18592749536037445, "rewards/margins": 0.05780297517776489, "rewards/rejected": -0.24373047053813934, "step": 801 }, { "epoch": 2.195756331279945, "grad_norm": 3.0148823261260986, "learning_rate": 8.901369863013697e-07, "log_odds_chosen": 0.784351110458374, "log_odds_ratio": -0.4576268494129181, "logits/chosen": -0.017163816839456558, "logits/rejected": -0.03407129645347595, "logps/chosen": -2.2069809436798096, "logps/rejected": -2.896501302719116, "loss": 1.1333, "nll_loss": 1.0875529050827026, "rewards/accuracies": 0.875, "rewards/chosen": -0.22069808840751648, "rewards/margins": 0.0689520388841629, "rewards/rejected": -0.2896501421928406, "step": 802 }, { "epoch": 2.198494182067077, "grad_norm": 4.498048305511475, "learning_rate": 8.9e-07, "log_odds_chosen": -0.7472878694534302, "log_odds_ratio": -1.4992998838424683, "logits/chosen": -0.05484647676348686, "logits/rejected": -0.0196845680475235, "logps/chosen": -3.3062362670898438, "logps/rejected": -2.4465296268463135, "loss": 1.239, "nll_loss": 1.0891178846359253, "rewards/accuracies": 0.375, "rewards/chosen": -0.33062368631362915, "rewards/margins": -0.0859706699848175, "rewards/rejected": -0.24465295672416687, "step": 803 }, { "epoch": 2.2012320328542097, "grad_norm": 3.401803731918335, "learning_rate": 8.898630136986302e-07, "log_odds_chosen": 0.9626626968383789, "log_odds_ratio": -0.5978009700775146, "logits/chosen": -0.11865255981683731, "logits/rejected": -0.12393677979707718, "logps/chosen": -2.6902568340301514, "logps/rejected": -3.5899782180786133, "loss": 1.1793, "nll_loss": 1.1194968223571777, "rewards/accuracies": 0.625, "rewards/chosen": -0.26902565360069275, "rewards/margins": 0.08997216075658798, "rewards/rejected": -0.3589978516101837, "step": 804 }, { "epoch": 2.2039698836413417, "grad_norm": 2.6605305671691895, "learning_rate": 8.897260273972603e-07, "log_odds_chosen": 0.2828570604324341, "log_odds_ratio": -0.6002785563468933, "logits/chosen": -0.17965000867843628, "logits/rejected": -0.22950124740600586, "logps/chosen": -1.89691960811615, "logps/rejected": -2.1168651580810547, "loss": 1.1305, "nll_loss": 1.0704271793365479, "rewards/accuracies": 0.625, "rewards/chosen": -0.189691960811615, "rewards/margins": 0.021994566544890404, "rewards/rejected": -0.21168652176856995, "step": 805 }, { "epoch": 2.206707734428474, "grad_norm": 4.610719680786133, "learning_rate": 8.895890410958904e-07, "log_odds_chosen": -0.3420208692550659, "log_odds_ratio": -1.1066651344299316, "logits/chosen": -0.0280755702406168, "logits/rejected": -0.034884944558143616, "logps/chosen": -3.550159454345703, "logps/rejected": -3.196719169616699, "loss": 1.2574, "nll_loss": 1.1466915607452393, "rewards/accuracies": 0.375, "rewards/chosen": -0.35501593351364136, "rewards/margins": -0.035344019532203674, "rewards/rejected": -0.3196719288825989, "step": 806 }, { "epoch": 2.209445585215606, "grad_norm": 3.0658187866210938, "learning_rate": 8.894520547945205e-07, "log_odds_chosen": 0.4817509651184082, "log_odds_ratio": -0.5957956314086914, "logits/chosen": -0.001811787486076355, "logits/rejected": -0.04720892757177353, "logps/chosen": -2.315959930419922, "logps/rejected": -2.749577283859253, "loss": 1.159, "nll_loss": 1.0993989706039429, "rewards/accuracies": 0.625, "rewards/chosen": -0.23159600794315338, "rewards/margins": 0.043361734598875046, "rewards/rejected": -0.2749577462673187, "step": 807 }, { "epoch": 2.212183436002738, "grad_norm": 3.0123393535614014, "learning_rate": 8.893150684931507e-07, "log_odds_chosen": 0.42018362879753113, "log_odds_ratio": -0.6282203197479248, "logits/chosen": 0.010532278567552567, "logits/rejected": 0.04581481218338013, "logps/chosen": -2.0277817249298096, "logps/rejected": -2.426807403564453, "loss": 1.1267, "nll_loss": 1.0638954639434814, "rewards/accuracies": 0.625, "rewards/chosen": -0.2027781754732132, "rewards/margins": 0.03990257903933525, "rewards/rejected": -0.24268075823783875, "step": 808 }, { "epoch": 2.21492128678987, "grad_norm": 3.1748721599578857, "learning_rate": 8.891780821917809e-07, "log_odds_chosen": 0.02725920081138611, "log_odds_ratio": -0.7443253993988037, "logits/chosen": 0.04461333528161049, "logits/rejected": 0.04683303087949753, "logps/chosen": -2.497424364089966, "logps/rejected": -2.4800162315368652, "loss": 1.0858, "nll_loss": 1.0113601684570312, "rewards/accuracies": 0.5, "rewards/chosen": -0.24974243342876434, "rewards/margins": -0.0017407946288585663, "rewards/rejected": -0.24800162017345428, "step": 809 }, { "epoch": 2.217659137577002, "grad_norm": 3.1703007221221924, "learning_rate": 8.890410958904109e-07, "log_odds_chosen": 0.11346644163131714, "log_odds_ratio": -0.7049777507781982, "logits/chosen": -0.13080370426177979, "logits/rejected": -0.13654136657714844, "logps/chosen": -2.472599506378174, "logps/rejected": -2.5609052181243896, "loss": 1.1112, "nll_loss": 1.0407404899597168, "rewards/accuracies": 0.5, "rewards/chosen": -0.2472599446773529, "rewards/margins": 0.008830562233924866, "rewards/rejected": -0.25609052181243896, "step": 810 }, { "epoch": 2.220396988364134, "grad_norm": 3.5986950397491455, "learning_rate": 8.889041095890411e-07, "log_odds_chosen": 0.6696658730506897, "log_odds_ratio": -0.6853468418121338, "logits/chosen": 0.014124780893325806, "logits/rejected": 0.060321927070617676, "logps/chosen": -2.710249423980713, "logps/rejected": -3.3364953994750977, "loss": 1.0864, "nll_loss": 1.0178165435791016, "rewards/accuracies": 0.75, "rewards/chosen": -0.2710249423980713, "rewards/margins": 0.06262461841106415, "rewards/rejected": -0.33364954590797424, "step": 811 }, { "epoch": 2.223134839151266, "grad_norm": 3.829296350479126, "learning_rate": 8.887671232876713e-07, "log_odds_chosen": -0.5166419744491577, "log_odds_ratio": -1.1819274425506592, "logits/chosen": -0.1770438551902771, "logits/rejected": -0.2128215730190277, "logps/chosen": -2.96475887298584, "logps/rejected": -2.4228272438049316, "loss": 1.2508, "nll_loss": 1.1326305866241455, "rewards/accuracies": 0.5, "rewards/chosen": -0.29647591710090637, "rewards/margins": -0.054193172603845596, "rewards/rejected": -0.24228274822235107, "step": 812 }, { "epoch": 2.2258726899383983, "grad_norm": 3.764859199523926, "learning_rate": 8.886301369863013e-07, "log_odds_chosen": 0.20651525259017944, "log_odds_ratio": -0.8733446598052979, "logits/chosen": -0.02836402878165245, "logits/rejected": -0.014894828200340271, "logps/chosen": -3.0869319438934326, "logps/rejected": -3.2575037479400635, "loss": 1.1908, "nll_loss": 1.1034334897994995, "rewards/accuracies": 0.625, "rewards/chosen": -0.30869320034980774, "rewards/margins": 0.017057154327630997, "rewards/rejected": -0.3257503807544708, "step": 813 }, { "epoch": 2.2286105407255303, "grad_norm": 3.089487314224243, "learning_rate": 8.884931506849315e-07, "log_odds_chosen": -0.05193805694580078, "log_odds_ratio": -0.9369155168533325, "logits/chosen": -0.27058157324790955, "logits/rejected": -0.2857980728149414, "logps/chosen": -2.6777329444885254, "logps/rejected": -2.631554126739502, "loss": 1.2398, "nll_loss": 1.1460895538330078, "rewards/accuracies": 0.375, "rewards/chosen": -0.2677733302116394, "rewards/margins": -0.004617897793650627, "rewards/rejected": -0.26315540075302124, "step": 814 }, { "epoch": 2.2313483915126624, "grad_norm": 3.8022348880767822, "learning_rate": 8.883561643835617e-07, "log_odds_chosen": -0.42130815982818604, "log_odds_ratio": -1.024923324584961, "logits/chosen": -0.1082189530134201, "logits/rejected": -0.10319085419178009, "logps/chosen": -2.6799612045288086, "logps/rejected": -2.2631325721740723, "loss": 1.1035, "nll_loss": 1.0010427236557007, "rewards/accuracies": 0.375, "rewards/chosen": -0.2679961323738098, "rewards/margins": -0.04168283939361572, "rewards/rejected": -0.2263132929801941, "step": 815 }, { "epoch": 2.2340862422997945, "grad_norm": 2.954005241394043, "learning_rate": 8.882191780821917e-07, "log_odds_chosen": 0.7017017006874084, "log_odds_ratio": -0.516768217086792, "logits/chosen": -0.026317287236452103, "logits/rejected": -0.030599042773246765, "logps/chosen": -1.846044659614563, "logps/rejected": -2.483886957168579, "loss": 1.1099, "nll_loss": 1.05818772315979, "rewards/accuracies": 0.75, "rewards/chosen": -0.1846044659614563, "rewards/margins": 0.06378420442342758, "rewards/rejected": -0.24838867783546448, "step": 816 }, { "epoch": 2.236824093086927, "grad_norm": 3.8606958389282227, "learning_rate": 8.880821917808219e-07, "log_odds_chosen": 0.28968846797943115, "log_odds_ratio": -0.807729184627533, "logits/chosen": -0.0013532750308513641, "logits/rejected": 0.03263978660106659, "logps/chosen": -2.476822853088379, "logps/rejected": -2.70878005027771, "loss": 1.1287, "nll_loss": 1.0479282140731812, "rewards/accuracies": 0.5, "rewards/chosen": -0.24768228828907013, "rewards/margins": 0.023195721209049225, "rewards/rejected": -0.27087801694869995, "step": 817 }, { "epoch": 2.239561943874059, "grad_norm": 3.8633906841278076, "learning_rate": 8.879452054794521e-07, "log_odds_chosen": -0.16268745064735413, "log_odds_ratio": -0.8291966915130615, "logits/chosen": 0.07338312268257141, "logits/rejected": 0.0747871920466423, "logps/chosen": -3.2012414932250977, "logps/rejected": -3.0331153869628906, "loss": 1.0596, "nll_loss": 0.97669517993927, "rewards/accuracies": 0.5, "rewards/chosen": -0.32012417912483215, "rewards/margins": -0.016812598332762718, "rewards/rejected": -0.3033115565776825, "step": 818 }, { "epoch": 2.242299794661191, "grad_norm": 3.2791502475738525, "learning_rate": 8.878082191780822e-07, "log_odds_chosen": 0.12558801472187042, "log_odds_ratio": -0.7820916175842285, "logits/chosen": -0.14200225472450256, "logits/rejected": -0.17092737555503845, "logps/chosen": -2.2120048999786377, "logps/rejected": -2.2965786457061768, "loss": 1.1722, "nll_loss": 1.0939863920211792, "rewards/accuracies": 0.625, "rewards/chosen": -0.22120049595832825, "rewards/margins": 0.00845737848430872, "rewards/rejected": -0.2296578586101532, "step": 819 }, { "epoch": 2.245037645448323, "grad_norm": 3.167233467102051, "learning_rate": 8.876712328767123e-07, "log_odds_chosen": 0.09707838296890259, "log_odds_ratio": -0.7740880250930786, "logits/chosen": -0.026340655982494354, "logits/rejected": -0.02461083047091961, "logps/chosen": -2.2793655395507812, "logps/rejected": -2.3779802322387695, "loss": 1.0647, "nll_loss": 0.9873123168945312, "rewards/accuracies": 0.625, "rewards/chosen": -0.22793656587600708, "rewards/margins": 0.009861474856734276, "rewards/rejected": -0.2377980351448059, "step": 820 }, { "epoch": 2.2477754962354553, "grad_norm": 2.63383412361145, "learning_rate": 8.875342465753424e-07, "log_odds_chosen": 0.38868117332458496, "log_odds_ratio": -0.5601668357849121, "logits/chosen": -0.14079104363918304, "logits/rejected": -0.16689054667949677, "logps/chosen": -2.010329246520996, "logps/rejected": -2.3174948692321777, "loss": 1.169, "nll_loss": 1.1130155324935913, "rewards/accuracies": 0.625, "rewards/chosen": -0.20103290677070618, "rewards/margins": 0.030716586858034134, "rewards/rejected": -0.2317495048046112, "step": 821 }, { "epoch": 2.2505133470225873, "grad_norm": 2.9282870292663574, "learning_rate": 8.873972602739726e-07, "log_odds_chosen": -0.014121830463409424, "log_odds_ratio": -0.7924070954322815, "logits/chosen": -0.007431972771883011, "logits/rejected": 0.0011242777109146118, "logps/chosen": -2.3942148685455322, "logps/rejected": -2.3593287467956543, "loss": 1.1344, "nll_loss": 1.0551836490631104, "rewards/accuracies": 0.5, "rewards/chosen": -0.23942148685455322, "rewards/margins": -0.0034886114299297333, "rewards/rejected": -0.23593288660049438, "step": 822 }, { "epoch": 2.2532511978097194, "grad_norm": 2.811800718307495, "learning_rate": 8.872602739726027e-07, "log_odds_chosen": 0.1797575056552887, "log_odds_ratio": -0.7214372754096985, "logits/chosen": -0.18999744951725006, "logits/rejected": -0.28338539600372314, "logps/chosen": -2.04797625541687, "logps/rejected": -2.1612086296081543, "loss": 1.2454, "nll_loss": 1.173232913017273, "rewards/accuracies": 0.375, "rewards/chosen": -0.204797625541687, "rewards/margins": 0.01132325828075409, "rewards/rejected": -0.2161208689212799, "step": 823 }, { "epoch": 2.2559890485968515, "grad_norm": 2.8875718116760254, "learning_rate": 8.871232876712328e-07, "log_odds_chosen": 0.9226049780845642, "log_odds_ratio": -0.6643358469009399, "logits/chosen": -0.09065335988998413, "logits/rejected": -0.1434890627861023, "logps/chosen": -2.126288414001465, "logps/rejected": -3.034335136413574, "loss": 1.0981, "nll_loss": 1.0316863059997559, "rewards/accuracies": 0.5, "rewards/chosen": -0.2126288264989853, "rewards/margins": 0.09080466628074646, "rewards/rejected": -0.30343350768089294, "step": 824 }, { "epoch": 2.2587268993839835, "grad_norm": 2.8740780353546143, "learning_rate": 8.86986301369863e-07, "log_odds_chosen": 0.25882992148399353, "log_odds_ratio": -0.6254210472106934, "logits/chosen": -0.012084994465112686, "logits/rejected": -0.07298144698143005, "logps/chosen": -2.178101062774658, "logps/rejected": -2.3682796955108643, "loss": 1.0675, "nll_loss": 1.0049268007278442, "rewards/accuracies": 0.625, "rewards/chosen": -0.21781012415885925, "rewards/margins": 0.019017856568098068, "rewards/rejected": -0.23682796955108643, "step": 825 }, { "epoch": 2.2614647501711156, "grad_norm": 3.03136944770813, "learning_rate": 8.868493150684932e-07, "log_odds_chosen": 0.42854994535446167, "log_odds_ratio": -0.6498978137969971, "logits/chosen": -0.11792179197072983, "logits/rejected": -0.13871678709983826, "logps/chosen": -2.338104248046875, "logps/rejected": -2.715284824371338, "loss": 1.0564, "nll_loss": 0.9913983345031738, "rewards/accuracies": 0.625, "rewards/chosen": -0.2338104248046875, "rewards/margins": 0.03771805763244629, "rewards/rejected": -0.2715284824371338, "step": 826 }, { "epoch": 2.2642026009582477, "grad_norm": 3.0069305896759033, "learning_rate": 8.867123287671232e-07, "log_odds_chosen": 0.3401222229003906, "log_odds_ratio": -0.6321756839752197, "logits/chosen": -0.08397315442562103, "logits/rejected": -0.13163240253925323, "logps/chosen": -2.165842056274414, "logps/rejected": -2.438584327697754, "loss": 1.0982, "nll_loss": 1.0350275039672852, "rewards/accuracies": 0.625, "rewards/chosen": -0.2165842056274414, "rewards/margins": 0.027274230495095253, "rewards/rejected": -0.2438584417104721, "step": 827 }, { "epoch": 2.2669404517453797, "grad_norm": 3.2160680294036865, "learning_rate": 8.865753424657534e-07, "log_odds_chosen": 0.52853924036026, "log_odds_ratio": -0.7585675120353699, "logits/chosen": 0.07779590785503387, "logits/rejected": 0.032422058284282684, "logps/chosen": -2.54158878326416, "logps/rejected": -2.959615468978882, "loss": 1.2051, "nll_loss": 1.129217505455017, "rewards/accuracies": 0.625, "rewards/chosen": -0.2541588842868805, "rewards/margins": 0.04180266708135605, "rewards/rejected": -0.29596155881881714, "step": 828 }, { "epoch": 2.269678302532512, "grad_norm": 3.482823133468628, "learning_rate": 8.864383561643836e-07, "log_odds_chosen": 0.16297133266925812, "log_odds_ratio": -0.7184145450592041, "logits/chosen": -0.056742601096630096, "logits/rejected": -0.12018531560897827, "logps/chosen": -2.7106428146362305, "logps/rejected": -2.7805685997009277, "loss": 1.1403, "nll_loss": 1.068495273590088, "rewards/accuracies": 0.5, "rewards/chosen": -0.27106428146362305, "rewards/margins": 0.006992574781179428, "rewards/rejected": -0.2780568599700928, "step": 829 }, { "epoch": 2.272416153319644, "grad_norm": 3.5030031204223633, "learning_rate": 8.863013698630136e-07, "log_odds_chosen": -0.6677634716033936, "log_odds_ratio": -1.1622267961502075, "logits/chosen": -0.05069498345255852, "logits/rejected": -0.01883753389120102, "logps/chosen": -2.5093765258789062, "logps/rejected": -1.8877075910568237, "loss": 1.1668, "nll_loss": 1.050599217414856, "rewards/accuracies": 0.375, "rewards/chosen": -0.25093764066696167, "rewards/margins": -0.06216689199209213, "rewards/rejected": -0.18877077102661133, "step": 830 }, { "epoch": 2.2751540041067764, "grad_norm": 3.550884962081909, "learning_rate": 8.861643835616438e-07, "log_odds_chosen": 0.40965554118156433, "log_odds_ratio": -0.566737174987793, "logits/chosen": 0.009209472686052322, "logits/rejected": 0.03320056200027466, "logps/chosen": -2.345707654953003, "logps/rejected": -2.706982374191284, "loss": 1.0469, "nll_loss": 0.9902043342590332, "rewards/accuracies": 0.75, "rewards/chosen": -0.23457077145576477, "rewards/margins": 0.03612745925784111, "rewards/rejected": -0.270698219537735, "step": 831 }, { "epoch": 2.2778918548939084, "grad_norm": 2.84122896194458, "learning_rate": 8.86027397260274e-07, "log_odds_chosen": 0.8724208474159241, "log_odds_ratio": -0.7091245651245117, "logits/chosen": -0.09426289796829224, "logits/rejected": -0.12493152916431427, "logps/chosen": -2.153545618057251, "logps/rejected": -2.957335948944092, "loss": 1.0912, "nll_loss": 1.0202414989471436, "rewards/accuracies": 0.625, "rewards/chosen": -0.2153545618057251, "rewards/margins": 0.08037903904914856, "rewards/rejected": -0.29573360085487366, "step": 832 }, { "epoch": 2.2806297056810405, "grad_norm": 3.0260472297668457, "learning_rate": 8.858904109589041e-07, "log_odds_chosen": 0.023331046104431152, "log_odds_ratio": -0.8475092649459839, "logits/chosen": -0.060643333941698074, "logits/rejected": -0.07027947157621384, "logps/chosen": -1.8870271444320679, "logps/rejected": -1.8796532154083252, "loss": 1.0516, "nll_loss": 0.9668610095977783, "rewards/accuracies": 0.5, "rewards/chosen": -0.18870273232460022, "rewards/margins": -0.0007374025881290436, "rewards/rejected": -0.1879653036594391, "step": 833 }, { "epoch": 2.2833675564681726, "grad_norm": 4.395815849304199, "learning_rate": 8.857534246575342e-07, "log_odds_chosen": -0.20735108852386475, "log_odds_ratio": -0.9792666435241699, "logits/chosen": -0.037520937621593475, "logits/rejected": -0.03275274857878685, "logps/chosen": -2.8734493255615234, "logps/rejected": -2.5931906700134277, "loss": 1.1673, "nll_loss": 1.0694000720977783, "rewards/accuracies": 0.75, "rewards/chosen": -0.28734490275382996, "rewards/margins": -0.02802584320306778, "rewards/rejected": -0.2593190670013428, "step": 834 }, { "epoch": 2.2861054072553046, "grad_norm": 3.7165186405181885, "learning_rate": 8.856164383561644e-07, "log_odds_chosen": -0.21783597767353058, "log_odds_ratio": -1.0586256980895996, "logits/chosen": 0.0372498594224453, "logits/rejected": 0.0945950448513031, "logps/chosen": -3.2505626678466797, "logps/rejected": -3.0124425888061523, "loss": 1.1733, "nll_loss": 1.0674340724945068, "rewards/accuracies": 0.375, "rewards/chosen": -0.3250562846660614, "rewards/margins": -0.023812003433704376, "rewards/rejected": -0.30124425888061523, "step": 835 }, { "epoch": 2.2888432580424367, "grad_norm": 2.8562870025634766, "learning_rate": 8.854794520547945e-07, "log_odds_chosen": 1.7650401592254639, "log_odds_ratio": -0.46983540058135986, "logits/chosen": 0.09407006204128265, "logits/rejected": 0.061640892177820206, "logps/chosen": -2.2141458988189697, "logps/rejected": -3.9198241233825684, "loss": 1.1324, "nll_loss": 1.0854026079177856, "rewards/accuracies": 0.875, "rewards/chosen": -0.22141459584236145, "rewards/margins": 0.1705678403377533, "rewards/rejected": -0.39198240637779236, "step": 836 }, { "epoch": 2.291581108829569, "grad_norm": 2.746575355529785, "learning_rate": 8.853424657534246e-07, "log_odds_chosen": 0.48125988245010376, "log_odds_ratio": -0.5691766738891602, "logits/chosen": -0.04161033779382706, "logits/rejected": -0.09618803858757019, "logps/chosen": -1.9276418685913086, "logps/rejected": -2.32816743850708, "loss": 1.1706, "nll_loss": 1.113709568977356, "rewards/accuracies": 0.75, "rewards/chosen": -0.19276419281959534, "rewards/margins": 0.04005255550146103, "rewards/rejected": -0.23281675577163696, "step": 837 }, { "epoch": 2.294318959616701, "grad_norm": 3.8068037033081055, "learning_rate": 8.852054794520547e-07, "log_odds_chosen": -0.31065863370895386, "log_odds_ratio": -1.0646238327026367, "logits/chosen": -0.035786524415016174, "logits/rejected": -0.033585138618946075, "logps/chosen": -3.035895347595215, "logps/rejected": -2.6788392066955566, "loss": 1.1416, "nll_loss": 1.0350905656814575, "rewards/accuracies": 0.625, "rewards/chosen": -0.3035895526409149, "rewards/margins": -0.03570561856031418, "rewards/rejected": -0.26788392663002014, "step": 838 }, { "epoch": 2.297056810403833, "grad_norm": 3.0735161304473877, "learning_rate": 8.850684931506849e-07, "log_odds_chosen": -0.11694298684597015, "log_odds_ratio": -0.7940464019775391, "logits/chosen": -0.09038034081459045, "logits/rejected": -0.0498211532831192, "logps/chosen": -2.3978395462036133, "logps/rejected": -2.281843423843384, "loss": 1.1051, "nll_loss": 1.0256534814834595, "rewards/accuracies": 0.5, "rewards/chosen": -0.23978395760059357, "rewards/margins": -0.011599601246416569, "rewards/rejected": -0.22818434238433838, "step": 839 }, { "epoch": 2.299794661190965, "grad_norm": 4.3040032386779785, "learning_rate": 8.849315068493151e-07, "log_odds_chosen": -0.16190162301063538, "log_odds_ratio": -1.002781629562378, "logits/chosen": 0.06926549971103668, "logits/rejected": 0.053422361612319946, "logps/chosen": -3.0172476768493652, "logps/rejected": -2.8156511783599854, "loss": 1.2495, "nll_loss": 1.149227499961853, "rewards/accuracies": 0.75, "rewards/chosen": -0.30172479152679443, "rewards/margins": -0.020159656181931496, "rewards/rejected": -0.2815651297569275, "step": 840 }, { "epoch": 2.302532511978097, "grad_norm": 2.941117286682129, "learning_rate": 8.847945205479451e-07, "log_odds_chosen": 0.21872544288635254, "log_odds_ratio": -0.7910953760147095, "logits/chosen": -0.0851401761174202, "logits/rejected": -0.15342582762241364, "logps/chosen": -2.6032233238220215, "logps/rejected": -2.7979631423950195, "loss": 1.1379, "nll_loss": 1.058773159980774, "rewards/accuracies": 0.625, "rewards/chosen": -0.26032233238220215, "rewards/margins": 0.0194739680737257, "rewards/rejected": -0.279796302318573, "step": 841 }, { "epoch": 2.305270362765229, "grad_norm": 4.110260486602783, "learning_rate": 8.846575342465753e-07, "log_odds_chosen": 0.11902526021003723, "log_odds_ratio": -0.7863740921020508, "logits/chosen": 0.10941161215305328, "logits/rejected": 0.13179358839988708, "logps/chosen": -2.3508710861206055, "logps/rejected": -2.3983328342437744, "loss": 1.0016, "nll_loss": 0.9230024218559265, "rewards/accuracies": 0.875, "rewards/chosen": -0.23508711159229279, "rewards/margins": 0.004746163263916969, "rewards/rejected": -0.2398332804441452, "step": 842 }, { "epoch": 2.3080082135523616, "grad_norm": 3.273195743560791, "learning_rate": 8.845205479452055e-07, "log_odds_chosen": 0.18521150946617126, "log_odds_ratio": -0.8244837522506714, "logits/chosen": 0.1713552623987198, "logits/rejected": 0.11741187423467636, "logps/chosen": -2.3130948543548584, "logps/rejected": -2.486508846282959, "loss": 1.1079, "nll_loss": 1.025402307510376, "rewards/accuracies": 0.375, "rewards/chosen": -0.23130948841571808, "rewards/margins": 0.017341386526823044, "rewards/rejected": -0.24865087866783142, "step": 843 }, { "epoch": 2.3107460643394937, "grad_norm": 4.328063011169434, "learning_rate": 8.843835616438355e-07, "log_odds_chosen": -0.27246028184890747, "log_odds_ratio": -0.9979090094566345, "logits/chosen": 0.06589321792125702, "logits/rejected": 0.06450136750936508, "logps/chosen": -2.9777207374572754, "logps/rejected": -2.6569981575012207, "loss": 1.0377, "nll_loss": 0.9379422068595886, "rewards/accuracies": 0.5, "rewards/chosen": -0.297772079706192, "rewards/margins": -0.032072246074676514, "rewards/rejected": -0.2656998634338379, "step": 844 }, { "epoch": 2.3134839151266258, "grad_norm": 3.71738338470459, "learning_rate": 8.842465753424657e-07, "log_odds_chosen": -0.3575710654258728, "log_odds_ratio": -1.0644055604934692, "logits/chosen": 0.0952615737915039, "logits/rejected": 0.1254691481590271, "logps/chosen": -3.0903632640838623, "logps/rejected": -2.723086357116699, "loss": 1.073, "nll_loss": 0.9665166139602661, "rewards/accuracies": 0.25, "rewards/chosen": -0.3090363144874573, "rewards/margins": -0.03672768175601959, "rewards/rejected": -0.2723086476325989, "step": 845 }, { "epoch": 2.316221765913758, "grad_norm": 2.8223531246185303, "learning_rate": 8.841095890410959e-07, "log_odds_chosen": 0.5525878667831421, "log_odds_ratio": -0.506629467010498, "logits/chosen": 0.01874888874590397, "logits/rejected": -0.028834577649831772, "logps/chosen": -1.9910571575164795, "logps/rejected": -2.471996307373047, "loss": 1.2064, "nll_loss": 1.1557718515396118, "rewards/accuracies": 0.875, "rewards/chosen": -0.19910572469234467, "rewards/margins": 0.048093900084495544, "rewards/rejected": -0.2471996247768402, "step": 846 }, { "epoch": 2.31895961670089, "grad_norm": 3.61508846282959, "learning_rate": 8.83972602739726e-07, "log_odds_chosen": 0.29793959856033325, "log_odds_ratio": -0.8874819278717041, "logits/chosen": -0.02027691714465618, "logits/rejected": 0.0019001327455043793, "logps/chosen": -2.853806734085083, "logps/rejected": -3.142331123352051, "loss": 1.0856, "nll_loss": 0.9968387484550476, "rewards/accuracies": 0.375, "rewards/chosen": -0.28538069128990173, "rewards/margins": 0.028852425515651703, "rewards/rejected": -0.31423312425613403, "step": 847 }, { "epoch": 2.321697467488022, "grad_norm": 3.554114818572998, "learning_rate": 8.838356164383561e-07, "log_odds_chosen": -0.04299403727054596, "log_odds_ratio": -0.7791085839271545, "logits/chosen": 0.1343730092048645, "logits/rejected": 0.1197299063205719, "logps/chosen": -2.682084798812866, "logps/rejected": -2.6462557315826416, "loss": 1.1949, "nll_loss": 1.1170061826705933, "rewards/accuracies": 0.625, "rewards/chosen": -0.26820850372314453, "rewards/margins": -0.0035829171538352966, "rewards/rejected": -0.26462554931640625, "step": 848 }, { "epoch": 2.324435318275154, "grad_norm": 2.495490074157715, "learning_rate": 8.836986301369863e-07, "log_odds_chosen": 0.6059593558311462, "log_odds_ratio": -0.4710427522659302, "logits/chosen": 0.05028906464576721, "logits/rejected": -0.05744708329439163, "logps/chosen": -1.7502570152282715, "logps/rejected": -2.235710620880127, "loss": 1.1353, "nll_loss": 1.0881531238555908, "rewards/accuracies": 0.875, "rewards/chosen": -0.17502571642398834, "rewards/margins": 0.04854534566402435, "rewards/rejected": -0.2235710620880127, "step": 849 }, { "epoch": 2.327173169062286, "grad_norm": 3.5863990783691406, "learning_rate": 8.835616438356164e-07, "log_odds_chosen": 0.8411130309104919, "log_odds_ratio": -0.5766454339027405, "logits/chosen": -0.014211617410182953, "logits/rejected": -0.03961991146206856, "logps/chosen": -2.2961316108703613, "logps/rejected": -3.0888848304748535, "loss": 1.0617, "nll_loss": 1.0040128231048584, "rewards/accuracies": 0.75, "rewards/chosen": -0.22961315512657166, "rewards/margins": 0.07927534729242325, "rewards/rejected": -0.3088884949684143, "step": 850 }, { "epoch": 2.329911019849418, "grad_norm": 3.319711446762085, "learning_rate": 8.834246575342465e-07, "log_odds_chosen": 0.3241267800331116, "log_odds_ratio": -0.67075514793396, "logits/chosen": 0.0564248226583004, "logits/rejected": 0.014690466225147247, "logps/chosen": -2.2876734733581543, "logps/rejected": -2.5582451820373535, "loss": 1.0484, "nll_loss": 0.9813693165779114, "rewards/accuracies": 0.75, "rewards/chosen": -0.22876735031604767, "rewards/margins": 0.027057191357016563, "rewards/rejected": -0.2558245360851288, "step": 851 }, { "epoch": 2.3326488706365502, "grad_norm": 3.7681217193603516, "learning_rate": 8.832876712328766e-07, "log_odds_chosen": 0.4465683102607727, "log_odds_ratio": -0.9410440325737, "logits/chosen": 0.23511117696762085, "logits/rejected": 0.2326093316078186, "logps/chosen": -3.2341554164886475, "logps/rejected": -3.630507230758667, "loss": 1.0907, "nll_loss": 0.9965536594390869, "rewards/accuracies": 0.75, "rewards/chosen": -0.3234155476093292, "rewards/margins": 0.03963516652584076, "rewards/rejected": -0.3630507290363312, "step": 852 }, { "epoch": 2.3353867214236823, "grad_norm": 3.623485803604126, "learning_rate": 8.831506849315068e-07, "log_odds_chosen": 0.411068856716156, "log_odds_ratio": -0.6067082285881042, "logits/chosen": 0.11842188984155655, "logits/rejected": 0.19209080934524536, "logps/chosen": -2.579008102416992, "logps/rejected": -2.961578607559204, "loss": 1.008, "nll_loss": 0.9473593235015869, "rewards/accuracies": 0.625, "rewards/chosen": -0.25790083408355713, "rewards/margins": 0.03825703635811806, "rewards/rejected": -0.2961578667163849, "step": 853 }, { "epoch": 2.3381245722108144, "grad_norm": 3.633671760559082, "learning_rate": 8.83013698630137e-07, "log_odds_chosen": 0.5375657677650452, "log_odds_ratio": -0.5771439075469971, "logits/chosen": 0.14813834428787231, "logits/rejected": 0.16168031096458435, "logps/chosen": -2.662783622741699, "logps/rejected": -3.169250965118408, "loss": 0.9911, "nll_loss": 0.9333792924880981, "rewards/accuracies": 0.625, "rewards/chosen": -0.26627832651138306, "rewards/margins": 0.050646767020225525, "rewards/rejected": -0.3169251084327698, "step": 854 }, { "epoch": 2.3408624229979464, "grad_norm": 3.310088872909546, "learning_rate": 8.82876712328767e-07, "log_odds_chosen": 0.06458419561386108, "log_odds_ratio": -0.8038425445556641, "logits/chosen": 0.2934330403804779, "logits/rejected": 0.2913511395454407, "logps/chosen": -2.7890913486480713, "logps/rejected": -2.7971348762512207, "loss": 0.9812, "nll_loss": 0.9007982611656189, "rewards/accuracies": 0.875, "rewards/chosen": -0.2789091467857361, "rewards/margins": 0.0008043423295021057, "rewards/rejected": -0.2797134816646576, "step": 855 }, { "epoch": 2.3436002737850785, "grad_norm": 2.790966033935547, "learning_rate": 8.827397260273972e-07, "log_odds_chosen": 0.4114106297492981, "log_odds_ratio": -0.6524879932403564, "logits/chosen": -0.08197709172964096, "logits/rejected": -0.1305331587791443, "logps/chosen": -1.955368995666504, "logps/rejected": -2.291815996170044, "loss": 1.1593, "nll_loss": 1.0940543413162231, "rewards/accuracies": 0.75, "rewards/chosen": -0.19553689658641815, "rewards/margins": 0.03364470601081848, "rewards/rejected": -0.22918160259723663, "step": 856 }, { "epoch": 2.3463381245722106, "grad_norm": 2.5690255165100098, "learning_rate": 8.826027397260274e-07, "log_odds_chosen": 0.5792591571807861, "log_odds_ratio": -0.47561025619506836, "logits/chosen": 0.024170339107513428, "logits/rejected": -0.0002828165888786316, "logps/chosen": -1.633898377418518, "logps/rejected": -2.12878155708313, "loss": 1.1078, "nll_loss": 1.0601892471313477, "rewards/accuracies": 0.875, "rewards/chosen": -0.16338983178138733, "rewards/margins": 0.04948830232024193, "rewards/rejected": -0.21287815272808075, "step": 857 }, { "epoch": 2.349075975359343, "grad_norm": 2.8611929416656494, "learning_rate": 8.824657534246574e-07, "log_odds_chosen": 0.13620871305465698, "log_odds_ratio": -0.7406310439109802, "logits/chosen": 0.16006115078926086, "logits/rejected": 0.11546014994382858, "logps/chosen": -2.2702531814575195, "logps/rejected": -2.3502962589263916, "loss": 1.0839, "nll_loss": 1.0098049640655518, "rewards/accuracies": 0.5, "rewards/chosen": -0.2270253300666809, "rewards/margins": 0.008004317060112953, "rewards/rejected": -0.23502963781356812, "step": 858 }, { "epoch": 2.351813826146475, "grad_norm": 3.8283979892730713, "learning_rate": 8.823287671232876e-07, "log_odds_chosen": 0.28382235765457153, "log_odds_ratio": -0.6054968237876892, "logits/chosen": 0.3261093199253082, "logits/rejected": 0.37404200434684753, "logps/chosen": -2.602365493774414, "logps/rejected": -2.8525915145874023, "loss": 0.9197, "nll_loss": 0.8591703772544861, "rewards/accuracies": 0.75, "rewards/chosen": -0.26023656129837036, "rewards/margins": 0.02502262592315674, "rewards/rejected": -0.2852591872215271, "step": 859 }, { "epoch": 2.3545516769336072, "grad_norm": 2.868952751159668, "learning_rate": 8.821917808219178e-07, "log_odds_chosen": 0.24383273720741272, "log_odds_ratio": -0.6473767757415771, "logits/chosen": 0.14087994396686554, "logits/rejected": 0.11153270304203033, "logps/chosen": -2.262768507003784, "logps/rejected": -2.477982521057129, "loss": 1.1055, "nll_loss": 1.040794849395752, "rewards/accuracies": 0.75, "rewards/chosen": -0.22627687454223633, "rewards/margins": 0.021521396934986115, "rewards/rejected": -0.24779826402664185, "step": 860 }, { "epoch": 2.3572895277207393, "grad_norm": 3.3580119609832764, "learning_rate": 8.820547945205479e-07, "log_odds_chosen": 0.4787772297859192, "log_odds_ratio": -0.5902885794639587, "logits/chosen": 0.11498505622148514, "logits/rejected": 0.11655448377132416, "logps/chosen": -2.0081589221954346, "logps/rejected": -2.4446277618408203, "loss": 0.9859, "nll_loss": 0.9268215894699097, "rewards/accuracies": 0.875, "rewards/chosen": -0.20081588625907898, "rewards/margins": 0.04364689439535141, "rewards/rejected": -0.2444627583026886, "step": 861 }, { "epoch": 2.3600273785078714, "grad_norm": 3.290644884109497, "learning_rate": 8.81917808219178e-07, "log_odds_chosen": 0.20525093376636505, "log_odds_ratio": -0.8035702109336853, "logits/chosen": 0.09375511854887009, "logits/rejected": 0.08324673771858215, "logps/chosen": -2.4501876831054688, "logps/rejected": -2.5755226612091064, "loss": 1.1412, "nll_loss": 1.0608069896697998, "rewards/accuracies": 0.625, "rewards/chosen": -0.24501878023147583, "rewards/margins": 0.012533491477370262, "rewards/rejected": -0.25755226612091064, "step": 862 }, { "epoch": 2.3627652292950034, "grad_norm": 4.262570381164551, "learning_rate": 8.817808219178082e-07, "log_odds_chosen": 0.5683791637420654, "log_odds_ratio": -0.7724160552024841, "logits/chosen": 0.17658790946006775, "logits/rejected": 0.19554553925991058, "logps/chosen": -2.9574761390686035, "logps/rejected": -3.4832873344421387, "loss": 1.0206, "nll_loss": 0.9433826208114624, "rewards/accuracies": 0.625, "rewards/chosen": -0.29574763774871826, "rewards/margins": 0.052581094205379486, "rewards/rejected": -0.34832870960235596, "step": 863 }, { "epoch": 2.3655030800821355, "grad_norm": 3.34059476852417, "learning_rate": 8.816438356164383e-07, "log_odds_chosen": 0.48155128955841064, "log_odds_ratio": -0.608109176158905, "logits/chosen": 0.05794877931475639, "logits/rejected": 0.08768025040626526, "logps/chosen": -2.7626914978027344, "logps/rejected": -3.21414852142334, "loss": 1.0302, "nll_loss": 0.9693707227706909, "rewards/accuracies": 0.625, "rewards/chosen": -0.2762691378593445, "rewards/margins": 0.04514571651816368, "rewards/rejected": -0.32141488790512085, "step": 864 }, { "epoch": 2.3682409308692676, "grad_norm": 3.9186363220214844, "learning_rate": 8.815068493150684e-07, "log_odds_chosen": -0.5661150217056274, "log_odds_ratio": -1.173190712928772, "logits/chosen": 0.09483622759580612, "logits/rejected": 0.16311714053153992, "logps/chosen": -2.913856029510498, "logps/rejected": -2.3187994956970215, "loss": 1.043, "nll_loss": 0.9256914854049683, "rewards/accuracies": 0.375, "rewards/chosen": -0.29138562083244324, "rewards/margins": -0.059505660086870193, "rewards/rejected": -0.23187994956970215, "step": 865 }, { "epoch": 2.3709787816563996, "grad_norm": 2.8946280479431152, "learning_rate": 8.813698630136986e-07, "log_odds_chosen": 0.9129911065101624, "log_odds_ratio": -0.5204905867576599, "logits/chosen": -0.03628828376531601, "logits/rejected": -0.05822952836751938, "logps/chosen": -2.1555633544921875, "logps/rejected": -2.9918100833892822, "loss": 1.0133, "nll_loss": 0.9612076878547668, "rewards/accuracies": 0.75, "rewards/chosen": -0.21555635333061218, "rewards/margins": 0.08362466841936111, "rewards/rejected": -0.2991810441017151, "step": 866 }, { "epoch": 2.3737166324435317, "grad_norm": 3.7558534145355225, "learning_rate": 8.812328767123287e-07, "log_odds_chosen": -0.030117064714431763, "log_odds_ratio": -0.865446925163269, "logits/chosen": 0.17424426972866058, "logits/rejected": 0.14249160885810852, "logps/chosen": -3.1851656436920166, "logps/rejected": -3.115723133087158, "loss": 1.0798, "nll_loss": 0.9932261109352112, "rewards/accuracies": 0.625, "rewards/chosen": -0.3185165524482727, "rewards/margins": -0.006944254040718079, "rewards/rejected": -0.3115723133087158, "step": 867 }, { "epoch": 2.3764544832306638, "grad_norm": 3.557352066040039, "learning_rate": 8.810958904109589e-07, "log_odds_chosen": -0.3974801301956177, "log_odds_ratio": -1.1878249645233154, "logits/chosen": 0.23453912138938904, "logits/rejected": 0.2497951090335846, "logps/chosen": -3.182541847229004, "logps/rejected": -2.7630162239074707, "loss": 1.0356, "nll_loss": 0.9167689085006714, "rewards/accuracies": 0.375, "rewards/chosen": -0.31825417280197144, "rewards/margins": -0.041952524334192276, "rewards/rejected": -0.27630162239074707, "step": 868 }, { "epoch": 2.379192334017796, "grad_norm": 3.987880229949951, "learning_rate": 8.809589041095889e-07, "log_odds_chosen": -0.19038769602775574, "log_odds_ratio": -1.0891568660736084, "logits/chosen": 0.2644839286804199, "logits/rejected": 0.3436332046985626, "logps/chosen": -2.8038365840911865, "logps/rejected": -2.586205005645752, "loss": 0.9766, "nll_loss": 0.8677197694778442, "rewards/accuracies": 0.5, "rewards/chosen": -0.2803836464881897, "rewards/margins": -0.021763183176517487, "rewards/rejected": -0.2586204707622528, "step": 869 }, { "epoch": 2.3819301848049284, "grad_norm": 3.6597564220428467, "learning_rate": 8.808219178082191e-07, "log_odds_chosen": 0.07638421654701233, "log_odds_ratio": -0.9261085987091064, "logits/chosen": 0.1713334023952484, "logits/rejected": 0.11911475658416748, "logps/chosen": -3.0170586109161377, "logps/rejected": -3.0581822395324707, "loss": 1.0135, "nll_loss": 0.9209095239639282, "rewards/accuracies": 0.5, "rewards/chosen": -0.30170583724975586, "rewards/margins": 0.004112351685762405, "rewards/rejected": -0.30581820011138916, "step": 870 }, { "epoch": 2.3846680355920604, "grad_norm": 3.595989942550659, "learning_rate": 8.806849315068493e-07, "log_odds_chosen": 0.5459715127944946, "log_odds_ratio": -0.7031132578849792, "logits/chosen": 0.0001914091408252716, "logits/rejected": -0.12985770404338837, "logps/chosen": -2.1498026847839355, "logps/rejected": -2.5598232746124268, "loss": 1.0779, "nll_loss": 1.0075693130493164, "rewards/accuracies": 0.625, "rewards/chosen": -0.21498025953769684, "rewards/margins": 0.04100207984447479, "rewards/rejected": -0.25598233938217163, "step": 871 }, { "epoch": 2.3874058863791925, "grad_norm": 2.802014112472534, "learning_rate": 8.805479452054793e-07, "log_odds_chosen": 0.973041832447052, "log_odds_ratio": -0.43116283416748047, "logits/chosen": 0.006388702429831028, "logits/rejected": -0.09437574446201324, "logps/chosen": -1.7661874294281006, "logps/rejected": -2.6257219314575195, "loss": 1.0325, "nll_loss": 0.9894275069236755, "rewards/accuracies": 0.75, "rewards/chosen": -0.176618754863739, "rewards/margins": 0.08595345914363861, "rewards/rejected": -0.2625722289085388, "step": 872 }, { "epoch": 2.3901437371663246, "grad_norm": 2.9172251224517822, "learning_rate": 8.804109589041095e-07, "log_odds_chosen": -0.24562549591064453, "log_odds_ratio": -0.9055352807044983, "logits/chosen": 0.14542530477046967, "logits/rejected": 0.10584767907857895, "logps/chosen": -2.2065770626068115, "logps/rejected": -1.969979166984558, "loss": 1.0816, "nll_loss": 0.9910424947738647, "rewards/accuracies": 0.375, "rewards/chosen": -0.22065770626068115, "rewards/margins": -0.023659788072109222, "rewards/rejected": -0.19699791073799133, "step": 873 }, { "epoch": 2.3928815879534566, "grad_norm": 3.93560528755188, "learning_rate": 8.802739726027397e-07, "log_odds_chosen": -0.9759323596954346, "log_odds_ratio": -1.5795286893844604, "logits/chosen": 0.35942313075065613, "logits/rejected": 0.39116430282592773, "logps/chosen": -3.083061695098877, "logps/rejected": -2.08565092086792, "loss": 1.1949, "nll_loss": 1.0369378328323364, "rewards/accuracies": 0.5, "rewards/chosen": -0.30830615758895874, "rewards/margins": -0.09974106401205063, "rewards/rejected": -0.2085651010274887, "step": 874 }, { "epoch": 2.3956194387405887, "grad_norm": 3.683925151824951, "learning_rate": 8.801369863013698e-07, "log_odds_chosen": -0.33818233013153076, "log_odds_ratio": -1.0573400259017944, "logits/chosen": 0.05517825111746788, "logits/rejected": 0.0366705022752285, "logps/chosen": -2.7080633640289307, "logps/rejected": -2.3326730728149414, "loss": 1.2769, "nll_loss": 1.1711941957473755, "rewards/accuracies": 0.625, "rewards/chosen": -0.27080634236335754, "rewards/margins": -0.037539009004831314, "rewards/rejected": -0.23326733708381653, "step": 875 }, { "epoch": 2.3983572895277208, "grad_norm": 2.783698320388794, "learning_rate": 8.799999999999999e-07, "log_odds_chosen": 0.8095651865005493, "log_odds_ratio": -0.5148540139198303, "logits/chosen": 0.2482346147298813, "logits/rejected": 0.2154591828584671, "logps/chosen": -1.9743082523345947, "logps/rejected": -2.751774787902832, "loss": 1.0764, "nll_loss": 1.0248854160308838, "rewards/accuracies": 0.625, "rewards/chosen": -0.197430819272995, "rewards/margins": 0.0777466669678688, "rewards/rejected": -0.2751775085926056, "step": 876 }, { "epoch": 2.401095140314853, "grad_norm": 2.8689894676208496, "learning_rate": 8.798630136986301e-07, "log_odds_chosen": 0.3043902516365051, "log_odds_ratio": -0.6165245771408081, "logits/chosen": 0.1726846992969513, "logits/rejected": 0.16146142780780792, "logps/chosen": -2.1229982376098633, "logps/rejected": -2.358546257019043, "loss": 1.0939, "nll_loss": 1.032246470451355, "rewards/accuracies": 0.625, "rewards/chosen": -0.21229979395866394, "rewards/margins": 0.023554803803563118, "rewards/rejected": -0.2358546257019043, "step": 877 }, { "epoch": 2.403832991101985, "grad_norm": 3.131030321121216, "learning_rate": 8.797260273972602e-07, "log_odds_chosen": 0.006896764039993286, "log_odds_ratio": -0.8308282494544983, "logits/chosen": -0.09221766144037247, "logits/rejected": -0.13737404346466064, "logps/chosen": -3.0497303009033203, "logps/rejected": -3.0279335975646973, "loss": 1.1759, "nll_loss": 1.0928242206573486, "rewards/accuracies": 0.625, "rewards/chosen": -0.3049730360507965, "rewards/margins": -0.0021796803921461105, "rewards/rejected": -0.30279338359832764, "step": 878 }, { "epoch": 2.406570841889117, "grad_norm": 3.254847288131714, "learning_rate": 8.795890410958903e-07, "log_odds_chosen": -0.13124091923236847, "log_odds_ratio": -0.8262298703193665, "logits/chosen": 0.14670626819133759, "logits/rejected": 0.16319338977336884, "logps/chosen": -2.4239065647125244, "logps/rejected": -2.2867016792297363, "loss": 1.0249, "nll_loss": 0.9422717690467834, "rewards/accuracies": 0.75, "rewards/chosen": -0.24239066243171692, "rewards/margins": -0.013720503076910973, "rewards/rejected": -0.2286701500415802, "step": 879 }, { "epoch": 2.409308692676249, "grad_norm": 3.2906715869903564, "learning_rate": 8.794520547945205e-07, "log_odds_chosen": 0.20975938439369202, "log_odds_ratio": -0.6302886605262756, "logits/chosen": 0.17972885072231293, "logits/rejected": 0.19967684149742126, "logps/chosen": -2.9525928497314453, "logps/rejected": -3.13761568069458, "loss": 1.0634, "nll_loss": 1.0003596544265747, "rewards/accuracies": 0.625, "rewards/chosen": -0.2952592968940735, "rewards/margins": 0.01850230246782303, "rewards/rejected": -0.3137615919113159, "step": 880 }, { "epoch": 2.412046543463381, "grad_norm": 2.684401035308838, "learning_rate": 8.793150684931506e-07, "log_odds_chosen": 0.9299779534339905, "log_odds_ratio": -0.45545494556427, "logits/chosen": 0.0286832544952631, "logits/rejected": -0.04190443456172943, "logps/chosen": -2.004831314086914, "logps/rejected": -2.827427864074707, "loss": 1.0489, "nll_loss": 1.003373622894287, "rewards/accuracies": 0.75, "rewards/chosen": -0.20048314332962036, "rewards/margins": 0.0822596549987793, "rewards/rejected": -0.28274279832839966, "step": 881 }, { "epoch": 2.414784394250513, "grad_norm": 2.920118570327759, "learning_rate": 8.791780821917809e-07, "log_odds_chosen": 0.11909863352775574, "log_odds_ratio": -0.7737126350402832, "logits/chosen": 0.3983422517776489, "logits/rejected": 0.400488018989563, "logps/chosen": -2.178271770477295, "logps/rejected": -2.244127035140991, "loss": 1.0843, "nll_loss": 1.0069303512573242, "rewards/accuracies": 0.5, "rewards/chosen": -0.2178272008895874, "rewards/margins": 0.006585527211427689, "rewards/rejected": -0.2244127094745636, "step": 882 }, { "epoch": 2.4175222450376452, "grad_norm": 3.015420913696289, "learning_rate": 8.790410958904108e-07, "log_odds_chosen": 0.817115068435669, "log_odds_ratio": -0.49191349744796753, "logits/chosen": 0.2162233591079712, "logits/rejected": 0.24868425726890564, "logps/chosen": -2.27687931060791, "logps/rejected": -3.0403597354888916, "loss": 1.0089, "nll_loss": 0.9596949815750122, "rewards/accuracies": 0.625, "rewards/chosen": -0.22768795490264893, "rewards/margins": 0.07634802907705307, "rewards/rejected": -0.3040359616279602, "step": 883 }, { "epoch": 2.4202600958247773, "grad_norm": 3.800556182861328, "learning_rate": 8.78904109589041e-07, "log_odds_chosen": -0.1135304868221283, "log_odds_ratio": -0.899758517742157, "logits/chosen": 0.3935669958591461, "logits/rejected": 0.4406297206878662, "logps/chosen": -2.88138484954834, "logps/rejected": -2.7239890098571777, "loss": 0.9807, "nll_loss": 0.8907467126846313, "rewards/accuracies": 0.625, "rewards/chosen": -0.2881385087966919, "rewards/margins": -0.015739597380161285, "rewards/rejected": -0.2723989188671112, "step": 884 }, { "epoch": 2.42299794661191, "grad_norm": 3.0641002655029297, "learning_rate": 8.787671232876713e-07, "log_odds_chosen": -0.036739856004714966, "log_odds_ratio": -0.7827287912368774, "logits/chosen": -0.011619608849287033, "logits/rejected": -0.04690353572368622, "logps/chosen": -2.3146793842315674, "logps/rejected": -2.2369372844696045, "loss": 1.0852, "nll_loss": 1.006919264793396, "rewards/accuracies": 0.5, "rewards/chosen": -0.23146796226501465, "rewards/margins": -0.00777423195540905, "rewards/rejected": -0.22369372844696045, "step": 885 }, { "epoch": 2.425735797399042, "grad_norm": 3.4636733531951904, "learning_rate": 8.786301369863012e-07, "log_odds_chosen": 0.5053487420082092, "log_odds_ratio": -0.7541667819023132, "logits/chosen": 0.3453355133533478, "logits/rejected": 0.3730432987213135, "logps/chosen": -2.4591007232666016, "logps/rejected": -2.872710943222046, "loss": 1.0305, "nll_loss": 0.9551123380661011, "rewards/accuracies": 0.75, "rewards/chosen": -0.24591007828712463, "rewards/margins": 0.041361015290021896, "rewards/rejected": -0.287271112203598, "step": 886 }, { "epoch": 2.428473648186174, "grad_norm": 2.9016048908233643, "learning_rate": 8.784931506849315e-07, "log_odds_chosen": 0.5494920611381531, "log_odds_ratio": -0.584747314453125, "logits/chosen": 0.17773908376693726, "logits/rejected": 0.2437954694032669, "logps/chosen": -2.1779367923736572, "logps/rejected": -2.71036434173584, "loss": 0.9715, "nll_loss": 0.9129827618598938, "rewards/accuracies": 0.75, "rewards/chosen": -0.21779367327690125, "rewards/margins": 0.053242746740579605, "rewards/rejected": -0.27103641629219055, "step": 887 }, { "epoch": 2.431211498973306, "grad_norm": 3.0706655979156494, "learning_rate": 8.783561643835617e-07, "log_odds_chosen": 0.7054116129875183, "log_odds_ratio": -0.571558952331543, "logits/chosen": 0.3100038170814514, "logits/rejected": 0.3057713508605957, "logps/chosen": -2.3981659412384033, "logps/rejected": -3.01709246635437, "loss": 1.0597, "nll_loss": 1.00254225730896, "rewards/accuracies": 0.625, "rewards/chosen": -0.2398165911436081, "rewards/margins": 0.06189265474677086, "rewards/rejected": -0.30170923471450806, "step": 888 }, { "epoch": 2.433949349760438, "grad_norm": 2.821359634399414, "learning_rate": 8.782191780821917e-07, "log_odds_chosen": 0.4908294677734375, "log_odds_ratio": -0.5715480446815491, "logits/chosen": 0.06814772635698318, "logits/rejected": 0.07096708565950394, "logps/chosen": -1.8416447639465332, "logps/rejected": -2.2756028175354004, "loss": 1.0119, "nll_loss": 0.9547206163406372, "rewards/accuracies": 0.625, "rewards/chosen": -0.18416447937488556, "rewards/margins": 0.04339579865336418, "rewards/rejected": -0.22756026685237885, "step": 889 }, { "epoch": 2.43668720054757, "grad_norm": 3.139909267425537, "learning_rate": 8.780821917808219e-07, "log_odds_chosen": 0.012919291853904724, "log_odds_ratio": -0.7820017337799072, "logits/chosen": 0.11085395514965057, "logits/rejected": 0.11044100672006607, "logps/chosen": -2.2581725120544434, "logps/rejected": -2.229668140411377, "loss": 0.9994, "nll_loss": 0.9211595058441162, "rewards/accuracies": 0.75, "rewards/chosen": -0.2258172482252121, "rewards/margins": -0.0028504226356744766, "rewards/rejected": -0.22296683490276337, "step": 890 }, { "epoch": 2.439425051334702, "grad_norm": 3.587934732437134, "learning_rate": 8.779452054794521e-07, "log_odds_chosen": 0.22936676442623138, "log_odds_ratio": -0.8108248710632324, "logits/chosen": 0.35189545154571533, "logits/rejected": 0.29033997654914856, "logps/chosen": -2.6378965377807617, "logps/rejected": -2.7864904403686523, "loss": 1.1451, "nll_loss": 1.0639967918395996, "rewards/accuracies": 0.625, "rewards/chosen": -0.26378965377807617, "rewards/margins": 0.014859391376376152, "rewards/rejected": -0.2786490321159363, "step": 891 }, { "epoch": 2.4421629021218343, "grad_norm": 2.846302032470703, "learning_rate": 8.778082191780822e-07, "log_odds_chosen": 1.4298959970474243, "log_odds_ratio": -0.43334096670150757, "logits/chosen": 0.28663408756256104, "logits/rejected": 0.29814955592155457, "logps/chosen": -1.968095302581787, "logps/rejected": -3.2943239212036133, "loss": 0.9199, "nll_loss": 0.8765923380851746, "rewards/accuracies": 0.75, "rewards/chosen": -0.1968095302581787, "rewards/margins": 0.1326228678226471, "rewards/rejected": -0.3294323682785034, "step": 892 }, { "epoch": 2.4449007529089664, "grad_norm": 3.204223394393921, "learning_rate": 8.776712328767123e-07, "log_odds_chosen": 0.38168200850486755, "log_odds_ratio": -0.5357335805892944, "logits/chosen": 0.002615557052195072, "logits/rejected": -0.05271953344345093, "logps/chosen": -2.4654619693756104, "logps/rejected": -2.806565284729004, "loss": 1.0754, "nll_loss": 1.0218724012374878, "rewards/accuracies": 0.875, "rewards/chosen": -0.24654620885849, "rewards/margins": 0.03411031514406204, "rewards/rejected": -0.28065651655197144, "step": 893 }, { "epoch": 2.4476386036960984, "grad_norm": 4.481161594390869, "learning_rate": 8.775342465753425e-07, "log_odds_chosen": 0.4533580243587494, "log_odds_ratio": -0.6859338879585266, "logits/chosen": 0.2975575923919678, "logits/rejected": 0.27070993185043335, "logps/chosen": -2.4979050159454346, "logps/rejected": -2.905843734741211, "loss": 1.0156, "nll_loss": 0.9469678401947021, "rewards/accuracies": 0.625, "rewards/chosen": -0.2497905194759369, "rewards/margins": 0.04079388082027435, "rewards/rejected": -0.29058441519737244, "step": 894 }, { "epoch": 2.4503764544832305, "grad_norm": 2.795316457748413, "learning_rate": 8.773972602739726e-07, "log_odds_chosen": 1.0681805610656738, "log_odds_ratio": -0.5036909580230713, "logits/chosen": 0.2833278477191925, "logits/rejected": 0.2225957065820694, "logps/chosen": -1.7751824855804443, "logps/rejected": -2.7123301029205322, "loss": 1.016, "nll_loss": 0.9656451940536499, "rewards/accuracies": 0.75, "rewards/chosen": -0.17751824855804443, "rewards/margins": 0.09371475875377655, "rewards/rejected": -0.2712329924106598, "step": 895 }, { "epoch": 2.453114305270363, "grad_norm": 3.310290813446045, "learning_rate": 8.772602739726027e-07, "log_odds_chosen": 0.09795546531677246, "log_odds_ratio": -0.7584430575370789, "logits/chosen": 0.38063719868659973, "logits/rejected": 0.37178245186805725, "logps/chosen": -2.6615214347839355, "logps/rejected": -2.703019380569458, "loss": 0.9481, "nll_loss": 0.87229323387146, "rewards/accuracies": 0.625, "rewards/chosen": -0.26615214347839355, "rewards/margins": 0.004149818792939186, "rewards/rejected": -0.2703019380569458, "step": 896 }, { "epoch": 2.455852156057495, "grad_norm": 3.0308585166931152, "learning_rate": 8.771232876712329e-07, "log_odds_chosen": -0.43091586232185364, "log_odds_ratio": -1.1468349695205688, "logits/chosen": 0.12345325201749802, "logits/rejected": 0.08842942863702774, "logps/chosen": -3.339967966079712, "logps/rejected": -2.91617488861084, "loss": 1.0795, "nll_loss": 0.9648277759552002, "rewards/accuracies": 0.625, "rewards/chosen": -0.33399680256843567, "rewards/margins": -0.04237932711839676, "rewards/rejected": -0.2916174530982971, "step": 897 }, { "epoch": 2.458590006844627, "grad_norm": 3.180015802383423, "learning_rate": 8.76986301369863e-07, "log_odds_chosen": 0.03081221878528595, "log_odds_ratio": -0.8255515098571777, "logits/chosen": 0.40297168493270874, "logits/rejected": 0.4371318519115448, "logps/chosen": -2.6077206134796143, "logps/rejected": -2.5775089263916016, "loss": 0.9191, "nll_loss": 0.8365948796272278, "rewards/accuracies": 0.625, "rewards/chosen": -0.26077204942703247, "rewards/margins": -0.0030211666598916054, "rewards/rejected": -0.25775089859962463, "step": 898 }, { "epoch": 2.461327857631759, "grad_norm": 3.0250425338745117, "learning_rate": 8.768493150684932e-07, "log_odds_chosen": 0.018666990101337433, "log_odds_ratio": -0.7989557981491089, "logits/chosen": 0.06369772553443909, "logits/rejected": 0.03804030641913414, "logps/chosen": -2.284358501434326, "logps/rejected": -2.242353677749634, "loss": 1.0692, "nll_loss": 0.9893163442611694, "rewards/accuracies": 0.625, "rewards/chosen": -0.22843587398529053, "rewards/margins": -0.00420047901570797, "rewards/rejected": -0.2242353856563568, "step": 899 }, { "epoch": 2.4640657084188913, "grad_norm": 3.3009908199310303, "learning_rate": 8.767123287671232e-07, "log_odds_chosen": 0.28780514001846313, "log_odds_ratio": -0.6476203203201294, "logits/chosen": 0.16473709046840668, "logits/rejected": 0.13743196427822113, "logps/chosen": -2.58333158493042, "logps/rejected": -2.851227283477783, "loss": 1.0605, "nll_loss": 0.9957461953163147, "rewards/accuracies": 0.75, "rewards/chosen": -0.25833314657211304, "rewards/margins": 0.026789572089910507, "rewards/rejected": -0.28512272238731384, "step": 900 }, { "epoch": 2.4668035592060233, "grad_norm": 2.534126043319702, "learning_rate": 8.765753424657534e-07, "log_odds_chosen": 1.1663384437561035, "log_odds_ratio": -0.4416677951812744, "logits/chosen": 0.18503601849079132, "logits/rejected": 0.06125534325838089, "logps/chosen": -1.833549976348877, "logps/rejected": -2.849583625793457, "loss": 0.9619, "nll_loss": 0.9176941514015198, "rewards/accuracies": 0.75, "rewards/chosen": -0.18335500359535217, "rewards/margins": 0.10160335898399353, "rewards/rejected": -0.2849583625793457, "step": 901 }, { "epoch": 2.4695414099931554, "grad_norm": 2.8433663845062256, "learning_rate": 8.764383561643836e-07, "log_odds_chosen": 0.3242225646972656, "log_odds_ratio": -0.638077437877655, "logits/chosen": 0.23053346574306488, "logits/rejected": 0.22298520803451538, "logps/chosen": -2.1880037784576416, "logps/rejected": -2.4454619884490967, "loss": 1.0523, "nll_loss": 0.9884788393974304, "rewards/accuracies": 0.875, "rewards/chosen": -0.2188004106283188, "rewards/margins": 0.025745807215571404, "rewards/rejected": -0.24454618990421295, "step": 902 }, { "epoch": 2.4722792607802875, "grad_norm": 3.4198923110961914, "learning_rate": 8.763013698630136e-07, "log_odds_chosen": -0.3329714238643646, "log_odds_ratio": -1.11287522315979, "logits/chosen": 0.33419710397720337, "logits/rejected": 0.2950914204120636, "logps/chosen": -3.2308549880981445, "logps/rejected": -2.9102749824523926, "loss": 0.9965, "nll_loss": 0.8852131366729736, "rewards/accuracies": 0.5, "rewards/chosen": -0.3230854868888855, "rewards/margins": -0.032057978212833405, "rewards/rejected": -0.2910275161266327, "step": 903 }, { "epoch": 2.4750171115674195, "grad_norm": 4.010914325714111, "learning_rate": 8.761643835616438e-07, "log_odds_chosen": -0.7687521576881409, "log_odds_ratio": -1.3742883205413818, "logits/chosen": 0.17553633451461792, "logits/rejected": 0.2618923783302307, "logps/chosen": -3.5234179496765137, "logps/rejected": -2.7738351821899414, "loss": 1.0552, "nll_loss": 0.9177703261375427, "rewards/accuracies": 0.375, "rewards/chosen": -0.35234177112579346, "rewards/margins": -0.07495826482772827, "rewards/rejected": -0.2773835062980652, "step": 904 }, { "epoch": 2.4777549623545516, "grad_norm": 2.685502052307129, "learning_rate": 8.76027397260274e-07, "log_odds_chosen": 0.5902636051177979, "log_odds_ratio": -0.4941328167915344, "logits/chosen": 0.4276615381240845, "logits/rejected": 0.42900699377059937, "logps/chosen": -1.9003512859344482, "logps/rejected": -2.408285140991211, "loss": 1.0024, "nll_loss": 0.9530156850814819, "rewards/accuracies": 0.875, "rewards/chosen": -0.1900351345539093, "rewards/margins": 0.050793372094631195, "rewards/rejected": -0.2408285140991211, "step": 905 }, { "epoch": 2.4804928131416837, "grad_norm": 2.6054229736328125, "learning_rate": 8.758904109589041e-07, "log_odds_chosen": 0.4819086194038391, "log_odds_ratio": -0.5292410850524902, "logits/chosen": 0.08456484973430634, "logits/rejected": 0.027094408869743347, "logps/chosen": -1.8636125326156616, "logps/rejected": -2.2808027267456055, "loss": 1.0716, "nll_loss": 1.0186944007873535, "rewards/accuracies": 0.75, "rewards/chosen": -0.18636125326156616, "rewards/margins": 0.041718997061252594, "rewards/rejected": -0.22808027267456055, "step": 906 }, { "epoch": 2.4832306639288158, "grad_norm": 3.173435688018799, "learning_rate": 8.757534246575342e-07, "log_odds_chosen": 1.0444562435150146, "log_odds_ratio": -0.4066619873046875, "logits/chosen": 0.25255581736564636, "logits/rejected": 0.24528296291828156, "logps/chosen": -1.8462743759155273, "logps/rejected": -2.710507869720459, "loss": 0.9243, "nll_loss": 0.883651852607727, "rewards/accuracies": 0.875, "rewards/chosen": -0.1846274435520172, "rewards/margins": 0.0864233449101448, "rewards/rejected": -0.2710508108139038, "step": 907 }, { "epoch": 2.485968514715948, "grad_norm": 5.137086868286133, "learning_rate": 8.756164383561644e-07, "log_odds_chosen": 0.4548485279083252, "log_odds_ratio": -0.5897088646888733, "logits/chosen": 0.4065287709236145, "logits/rejected": 0.3870947062969208, "logps/chosen": -2.4189846515655518, "logps/rejected": -2.7729365825653076, "loss": 0.9053, "nll_loss": 0.8462832570075989, "rewards/accuracies": 0.875, "rewards/chosen": -0.24189847707748413, "rewards/margins": 0.035395193845033646, "rewards/rejected": -0.27729368209838867, "step": 908 }, { "epoch": 2.48870636550308, "grad_norm": 2.7672996520996094, "learning_rate": 8.754794520547945e-07, "log_odds_chosen": 0.4338010847568512, "log_odds_ratio": -0.542140543460846, "logits/chosen": 0.1376713216304779, "logits/rejected": 0.13392804563045502, "logps/chosen": -2.2753396034240723, "logps/rejected": -2.6626529693603516, "loss": 0.9479, "nll_loss": 0.8937254548072815, "rewards/accuracies": 0.625, "rewards/chosen": -0.2275339663028717, "rewards/margins": 0.03873136639595032, "rewards/rejected": -0.266265332698822, "step": 909 }, { "epoch": 2.491444216290212, "grad_norm": 2.3649954795837402, "learning_rate": 8.753424657534246e-07, "log_odds_chosen": 1.25275719165802, "log_odds_ratio": -0.42112571001052856, "logits/chosen": 0.12226277589797974, "logits/rejected": 0.012120798230171204, "logps/chosen": -1.824967384338379, "logps/rejected": -2.935702323913574, "loss": 1.063, "nll_loss": 1.0209112167358398, "rewards/accuracies": 0.75, "rewards/chosen": -0.18249675631523132, "rewards/margins": 0.11107350140810013, "rewards/rejected": -0.29357025027275085, "step": 910 }, { "epoch": 2.4941820670773445, "grad_norm": 2.782259941101074, "learning_rate": 8.752054794520548e-07, "log_odds_chosen": 0.9607036113739014, "log_odds_ratio": -0.4545895457267761, "logits/chosen": 0.08480849862098694, "logits/rejected": 0.03951452672481537, "logps/chosen": -2.6133556365966797, "logps/rejected": -3.4690003395080566, "loss": 1.0095, "nll_loss": 0.9640569090843201, "rewards/accuracies": 0.875, "rewards/chosen": -0.261335551738739, "rewards/margins": 0.08556444197893143, "rewards/rejected": -0.34690001606941223, "step": 911 }, { "epoch": 2.4969199178644765, "grad_norm": 2.563159465789795, "learning_rate": 8.750684931506849e-07, "log_odds_chosen": 1.4885351657867432, "log_odds_ratio": -0.395485520362854, "logits/chosen": 0.2989283800125122, "logits/rejected": 0.23270705342292786, "logps/chosen": -1.7539379596710205, "logps/rejected": -3.0843591690063477, "loss": 1.0827, "nll_loss": 1.0431458950042725, "rewards/accuracies": 0.875, "rewards/chosen": -0.17539380490779877, "rewards/margins": 0.1330420970916748, "rewards/rejected": -0.30843591690063477, "step": 912 }, { "epoch": 2.4996577686516086, "grad_norm": 3.1448051929473877, "learning_rate": 8.749315068493151e-07, "log_odds_chosen": 0.5518778562545776, "log_odds_ratio": -0.5878446102142334, "logits/chosen": 0.38141536712646484, "logits/rejected": 0.3063100576400757, "logps/chosen": -2.440533399581909, "logps/rejected": -2.9381840229034424, "loss": 0.9901, "nll_loss": 0.9313440322875977, "rewards/accuracies": 0.625, "rewards/chosen": -0.24405333399772644, "rewards/margins": 0.04976506158709526, "rewards/rejected": -0.2938184142112732, "step": 913 }, { "epoch": 2.5023956194387407, "grad_norm": 2.904407262802124, "learning_rate": 8.747945205479451e-07, "log_odds_chosen": 0.27187269926071167, "log_odds_ratio": -0.6524685621261597, "logits/chosen": 0.30182257294654846, "logits/rejected": 0.26101016998291016, "logps/chosen": -2.4941086769104004, "logps/rejected": -2.737037420272827, "loss": 1.0331, "nll_loss": 0.9678748846054077, "rewards/accuracies": 0.625, "rewards/chosen": -0.24941086769104004, "rewards/margins": 0.024292875081300735, "rewards/rejected": -0.27370375394821167, "step": 914 }, { "epoch": 2.5051334702258727, "grad_norm": 2.7594850063323975, "learning_rate": 8.746575342465753e-07, "log_odds_chosen": 0.8431906700134277, "log_odds_ratio": -0.44696080684661865, "logits/chosen": 0.4535066783428192, "logits/rejected": 0.41004055738449097, "logps/chosen": -1.9896670579910278, "logps/rejected": -2.682488203048706, "loss": 0.9126, "nll_loss": 0.8678808808326721, "rewards/accuracies": 0.875, "rewards/chosen": -0.19896671175956726, "rewards/margins": 0.06928210705518723, "rewards/rejected": -0.2682487964630127, "step": 915 }, { "epoch": 2.507871321013005, "grad_norm": 2.7285473346710205, "learning_rate": 8.745205479452055e-07, "log_odds_chosen": 0.3506901264190674, "log_odds_ratio": -0.5672320127487183, "logits/chosen": 0.2289695143699646, "logits/rejected": 0.15791039168834686, "logps/chosen": -1.8064688444137573, "logps/rejected": -2.0608794689178467, "loss": 1.0926, "nll_loss": 1.0358903408050537, "rewards/accuracies": 0.875, "rewards/chosen": -0.1806468963623047, "rewards/margins": 0.025441065430641174, "rewards/rejected": -0.20608796179294586, "step": 916 }, { "epoch": 2.510609171800137, "grad_norm": 3.030698776245117, "learning_rate": 8.743835616438355e-07, "log_odds_chosen": 0.48617130517959595, "log_odds_ratio": -0.5417941212654114, "logits/chosen": 0.34755420684814453, "logits/rejected": 0.3049126863479614, "logps/chosen": -1.992368221282959, "logps/rejected": -2.3652536869049072, "loss": 1.0568, "nll_loss": 1.0026109218597412, "rewards/accuracies": 0.875, "rewards/chosen": -0.19923682510852814, "rewards/margins": 0.037288546562194824, "rewards/rejected": -0.23652538657188416, "step": 917 }, { "epoch": 2.513347022587269, "grad_norm": 3.861933946609497, "learning_rate": 8.742465753424657e-07, "log_odds_chosen": -0.3874928057193756, "log_odds_ratio": -0.936126708984375, "logits/chosen": 0.46099740266799927, "logits/rejected": 0.4721560478210449, "logps/chosen": -3.2279298305511475, "logps/rejected": -2.859926700592041, "loss": 1.1597, "nll_loss": 1.0661371946334839, "rewards/accuracies": 0.25, "rewards/chosen": -0.32279297709465027, "rewards/margins": -0.036800287663936615, "rewards/rejected": -0.28599268198013306, "step": 918 }, { "epoch": 2.516084873374401, "grad_norm": 3.320347309112549, "learning_rate": 8.741095890410959e-07, "log_odds_chosen": 0.07214593887329102, "log_odds_ratio": -0.7620428800582886, "logits/chosen": 0.4330560266971588, "logits/rejected": 0.4010714888572693, "logps/chosen": -2.7072112560272217, "logps/rejected": -2.7343831062316895, "loss": 0.9923, "nll_loss": 0.9161257743835449, "rewards/accuracies": 0.5, "rewards/chosen": -0.2707211673259735, "rewards/margins": 0.0027171913534402847, "rewards/rejected": -0.27343833446502686, "step": 919 }, { "epoch": 2.518822724161533, "grad_norm": 2.605154037475586, "learning_rate": 8.73972602739726e-07, "log_odds_chosen": 0.8852587938308716, "log_odds_ratio": -0.4027601182460785, "logits/chosen": 0.23740166425704956, "logits/rejected": 0.23977653682231903, "logps/chosen": -1.6812576055526733, "logps/rejected": -2.4147894382476807, "loss": 1.0028, "nll_loss": 0.9625720381736755, "rewards/accuracies": 0.875, "rewards/chosen": -0.16812576353549957, "rewards/margins": 0.07335316389799118, "rewards/rejected": -0.24147891998291016, "step": 920 }, { "epoch": 2.521560574948665, "grad_norm": 3.3570923805236816, "learning_rate": 8.738356164383561e-07, "log_odds_chosen": 0.3424714207649231, "log_odds_ratio": -0.5852946043014526, "logits/chosen": 0.27813342213630676, "logits/rejected": 0.27362096309661865, "logps/chosen": -2.8794562816619873, "logps/rejected": -3.1817007064819336, "loss": 1.0239, "nll_loss": 0.9654111862182617, "rewards/accuracies": 0.625, "rewards/chosen": -0.28794562816619873, "rewards/margins": 0.030224449932575226, "rewards/rejected": -0.31817007064819336, "step": 921 }, { "epoch": 2.5242984257357977, "grad_norm": 2.4530014991760254, "learning_rate": 8.736986301369863e-07, "log_odds_chosen": 0.2822539508342743, "log_odds_ratio": -0.7762398719787598, "logits/chosen": 0.24981622397899628, "logits/rejected": 0.22825205326080322, "logps/chosen": -1.809248685836792, "logps/rejected": -2.0045461654663086, "loss": 1.0263, "nll_loss": 0.9486908316612244, "rewards/accuracies": 0.625, "rewards/chosen": -0.18092486262321472, "rewards/margins": 0.019529735669493675, "rewards/rejected": -0.20045460760593414, "step": 922 }, { "epoch": 2.5270362765229297, "grad_norm": 3.1453802585601807, "learning_rate": 8.735616438356164e-07, "log_odds_chosen": 0.28105223178863525, "log_odds_ratio": -0.7686914801597595, "logits/chosen": 0.20098429918289185, "logits/rejected": 0.22038640081882477, "logps/chosen": -2.9514999389648438, "logps/rejected": -3.1844725608825684, "loss": 1.0641, "nll_loss": 0.9872711300849915, "rewards/accuracies": 0.625, "rewards/chosen": -0.2951500415802002, "rewards/margins": 0.023297246545553207, "rewards/rejected": -0.3184472322463989, "step": 923 }, { "epoch": 2.529774127310062, "grad_norm": 2.8554182052612305, "learning_rate": 8.734246575342465e-07, "log_odds_chosen": 0.38192200660705566, "log_odds_ratio": -0.593962550163269, "logits/chosen": 0.27461615204811096, "logits/rejected": 0.2859172821044922, "logps/chosen": -2.3966214656829834, "logps/rejected": -2.707507610321045, "loss": 1.1336, "nll_loss": 1.0742372274398804, "rewards/accuracies": 0.75, "rewards/chosen": -0.23966214060783386, "rewards/margins": 0.031088627874851227, "rewards/rejected": -0.2707507610321045, "step": 924 }, { "epoch": 2.532511978097194, "grad_norm": 3.006303310394287, "learning_rate": 8.732876712328767e-07, "log_odds_chosen": 1.1430087089538574, "log_odds_ratio": -0.5440545678138733, "logits/chosen": 0.3688655197620392, "logits/rejected": 0.36240074038505554, "logps/chosen": -1.8123494386672974, "logps/rejected": -2.7209219932556152, "loss": 1.0311, "nll_loss": 0.9766884446144104, "rewards/accuracies": 0.75, "rewards/chosen": -0.1812349408864975, "rewards/margins": 0.09085724502801895, "rewards/rejected": -0.27209219336509705, "step": 925 }, { "epoch": 2.535249828884326, "grad_norm": 3.073512554168701, "learning_rate": 8.731506849315068e-07, "log_odds_chosen": 0.06787383556365967, "log_odds_ratio": -0.9257254004478455, "logits/chosen": 0.4264705181121826, "logits/rejected": 0.3969074785709381, "logps/chosen": -2.780346632003784, "logps/rejected": -2.769684314727783, "loss": 0.9792, "nll_loss": 0.8866305351257324, "rewards/accuracies": 0.75, "rewards/chosen": -0.27803465723991394, "rewards/margins": -0.0010662302374839783, "rewards/rejected": -0.27696841955184937, "step": 926 }, { "epoch": 2.537987679671458, "grad_norm": 3.2582497596740723, "learning_rate": 8.73013698630137e-07, "log_odds_chosen": -0.06581853330135345, "log_odds_ratio": -0.7457014322280884, "logits/chosen": 0.19055795669555664, "logits/rejected": 0.1934259831905365, "logps/chosen": -2.403770685195923, "logps/rejected": -2.32995343208313, "loss": 0.9747, "nll_loss": 0.9001292586326599, "rewards/accuracies": 0.625, "rewards/chosen": -0.24037706851959229, "rewards/margins": -0.007381711155176163, "rewards/rejected": -0.23299536108970642, "step": 927 }, { "epoch": 2.54072553045859, "grad_norm": 2.6816916465759277, "learning_rate": 8.72876712328767e-07, "log_odds_chosen": 0.4093676209449768, "log_odds_ratio": -0.5971643924713135, "logits/chosen": 0.24682047963142395, "logits/rejected": 0.19679708778858185, "logps/chosen": -1.9598890542984009, "logps/rejected": -2.305382490158081, "loss": 1.039, "nll_loss": 0.9793046712875366, "rewards/accuracies": 0.625, "rewards/chosen": -0.19598890841007233, "rewards/margins": 0.03454934433102608, "rewards/rejected": -0.2305382639169693, "step": 928 }, { "epoch": 2.543463381245722, "grad_norm": 2.923109531402588, "learning_rate": 8.727397260273972e-07, "log_odds_chosen": 0.08872969448566437, "log_odds_ratio": -0.7651505470275879, "logits/chosen": 0.12998878955841064, "logits/rejected": 0.04388833045959473, "logps/chosen": -2.2696609497070312, "logps/rejected": -2.308798313140869, "loss": 1.0388, "nll_loss": 0.9622466564178467, "rewards/accuracies": 0.625, "rewards/chosen": -0.22696608304977417, "rewards/margins": 0.003913749009370804, "rewards/rejected": -0.23087984323501587, "step": 929 }, { "epoch": 2.546201232032854, "grad_norm": 2.707064628601074, "learning_rate": 8.726027397260274e-07, "log_odds_chosen": 0.4086059629917145, "log_odds_ratio": -0.6918254494667053, "logits/chosen": 0.36228176951408386, "logits/rejected": 0.33068016171455383, "logps/chosen": -2.2790331840515137, "logps/rejected": -2.601738452911377, "loss": 1.095, "nll_loss": 1.0258113145828247, "rewards/accuracies": 0.875, "rewards/chosen": -0.2279033064842224, "rewards/margins": 0.03227053955197334, "rewards/rejected": -0.26017382740974426, "step": 930 }, { "epoch": 2.5489390828199863, "grad_norm": 2.7938621044158936, "learning_rate": 8.724657534246574e-07, "log_odds_chosen": 0.5910235643386841, "log_odds_ratio": -0.5215591192245483, "logits/chosen": 0.22860130667686462, "logits/rejected": 0.18923404812812805, "logps/chosen": -2.162674903869629, "logps/rejected": -2.670670986175537, "loss": 1.0195, "nll_loss": 0.9673753976821899, "rewards/accuracies": 0.75, "rewards/chosen": -0.21626751124858856, "rewards/margins": 0.05079960078001022, "rewards/rejected": -0.2670671343803406, "step": 931 }, { "epoch": 2.5516769336071183, "grad_norm": 2.588656187057495, "learning_rate": 8.723287671232876e-07, "log_odds_chosen": 0.48375043272972107, "log_odds_ratio": -0.559234619140625, "logits/chosen": 0.24122723937034607, "logits/rejected": 0.15222005546092987, "logps/chosen": -1.9773869514465332, "logps/rejected": -2.4077861309051514, "loss": 1.0809, "nll_loss": 1.0249990224838257, "rewards/accuracies": 0.75, "rewards/chosen": -0.19773870706558228, "rewards/margins": 0.04303991049528122, "rewards/rejected": -0.2407786250114441, "step": 932 }, { "epoch": 2.5544147843942504, "grad_norm": 2.5538384914398193, "learning_rate": 8.721917808219178e-07, "log_odds_chosen": 0.6813555955886841, "log_odds_ratio": -0.5848603248596191, "logits/chosen": 0.37132760882377625, "logits/rejected": 0.3648492693901062, "logps/chosen": -2.319920539855957, "logps/rejected": -2.9470596313476562, "loss": 0.9295, "nll_loss": 0.8710352182388306, "rewards/accuracies": 0.75, "rewards/chosen": -0.23199203610420227, "rewards/margins": 0.06271393597126007, "rewards/rejected": -0.29470598697662354, "step": 933 }, { "epoch": 2.5571526351813825, "grad_norm": 2.4638071060180664, "learning_rate": 8.720547945205479e-07, "log_odds_chosen": 0.23320746421813965, "log_odds_ratio": -0.7005106806755066, "logits/chosen": 0.1507750153541565, "logits/rejected": 0.1263270229101181, "logps/chosen": -1.981542706489563, "logps/rejected": -2.1536569595336914, "loss": 0.9703, "nll_loss": 0.9002343416213989, "rewards/accuracies": 0.625, "rewards/chosen": -0.1981542706489563, "rewards/margins": 0.0172114297747612, "rewards/rejected": -0.2153657078742981, "step": 934 }, { "epoch": 2.5598904859685145, "grad_norm": 2.851339340209961, "learning_rate": 8.71917808219178e-07, "log_odds_chosen": 0.9423494935035706, "log_odds_ratio": -0.4302680492401123, "logits/chosen": 0.3287178575992584, "logits/rejected": 0.3301212787628174, "logps/chosen": -2.3286588191986084, "logps/rejected": -3.176208019256592, "loss": 0.9566, "nll_loss": 0.9135676622390747, "rewards/accuracies": 0.875, "rewards/chosen": -0.23286586999893188, "rewards/margins": 0.08475493639707565, "rewards/rejected": -0.31762081384658813, "step": 935 }, { "epoch": 2.5626283367556466, "grad_norm": 3.438875675201416, "learning_rate": 8.717808219178082e-07, "log_odds_chosen": 0.25189417600631714, "log_odds_ratio": -0.6750288009643555, "logits/chosen": 0.435164213180542, "logits/rejected": 0.4390060603618622, "logps/chosen": -2.4375054836273193, "logps/rejected": -2.615868091583252, "loss": 0.9036, "nll_loss": 0.8361335396766663, "rewards/accuracies": 0.75, "rewards/chosen": -0.24375055730342865, "rewards/margins": 0.017836278304457664, "rewards/rejected": -0.26158684492111206, "step": 936 }, { "epoch": 2.5653661875427787, "grad_norm": 2.583688259124756, "learning_rate": 8.716438356164383e-07, "log_odds_chosen": 0.014453299343585968, "log_odds_ratio": -0.7670565247535706, "logits/chosen": 0.18466147780418396, "logits/rejected": 0.19168633222579956, "logps/chosen": -2.126121759414673, "logps/rejected": -2.120051622390747, "loss": 1.064, "nll_loss": 0.9872583150863647, "rewards/accuracies": 0.375, "rewards/chosen": -0.21261218190193176, "rewards/margins": -0.0006070071831345558, "rewards/rejected": -0.21200516819953918, "step": 937 }, { "epoch": 2.5681040383299107, "grad_norm": 2.414900064468384, "learning_rate": 8.715068493150684e-07, "log_odds_chosen": 0.5315274000167847, "log_odds_ratio": -0.5147731304168701, "logits/chosen": 0.2780575454235077, "logits/rejected": 0.25923973321914673, "logps/chosen": -1.9613580703735352, "logps/rejected": -2.453947067260742, "loss": 1.002, "nll_loss": 0.9505568146705627, "rewards/accuracies": 0.75, "rewards/chosen": -0.19613581895828247, "rewards/margins": 0.04925891011953354, "rewards/rejected": -0.2453947216272354, "step": 938 }, { "epoch": 2.5708418891170433, "grad_norm": 3.163588285446167, "learning_rate": 8.713698630136986e-07, "log_odds_chosen": 0.4514991343021393, "log_odds_ratio": -0.6094340682029724, "logits/chosen": 0.3912409543991089, "logits/rejected": 0.38448020815849304, "logps/chosen": -2.555450439453125, "logps/rejected": -2.9704480171203613, "loss": 1.02, "nll_loss": 0.9590868353843689, "rewards/accuracies": 0.75, "rewards/chosen": -0.255545049905777, "rewards/margins": 0.04149976000189781, "rewards/rejected": -0.2970448136329651, "step": 939 }, { "epoch": 2.5735797399041753, "grad_norm": 2.489394426345825, "learning_rate": 8.712328767123287e-07, "log_odds_chosen": 0.5168628096580505, "log_odds_ratio": -0.5419597625732422, "logits/chosen": 0.3597323000431061, "logits/rejected": 0.3185461163520813, "logps/chosen": -2.024106979370117, "logps/rejected": -2.4752371311187744, "loss": 1.0617, "nll_loss": 1.00753653049469, "rewards/accuracies": 0.75, "rewards/chosen": -0.20241069793701172, "rewards/margins": 0.045113012194633484, "rewards/rejected": -0.2475237101316452, "step": 940 }, { "epoch": 2.5763175906913074, "grad_norm": 2.8197624683380127, "learning_rate": 8.710958904109589e-07, "log_odds_chosen": -0.04753252863883972, "log_odds_ratio": -0.8909788131713867, "logits/chosen": 0.3349749743938446, "logits/rejected": 0.33328819274902344, "logps/chosen": -2.610044479370117, "logps/rejected": -2.526864767074585, "loss": 1.1209, "nll_loss": 1.0317671298980713, "rewards/accuracies": 0.5, "rewards/chosen": -0.2610044479370117, "rewards/margins": -0.008317975327372551, "rewards/rejected": -0.2526865005493164, "step": 941 }, { "epoch": 2.5790554414784395, "grad_norm": 2.4469337463378906, "learning_rate": 8.70958904109589e-07, "log_odds_chosen": 1.5712854862213135, "log_odds_ratio": -0.4238754212856293, "logits/chosen": 0.17641016840934753, "logits/rejected": 0.060955408960580826, "logps/chosen": -1.7746844291687012, "logps/rejected": -3.2175674438476562, "loss": 1.0212, "nll_loss": 0.9788419008255005, "rewards/accuracies": 0.75, "rewards/chosen": -0.1774684190750122, "rewards/margins": 0.1442883163690567, "rewards/rejected": -0.3217567503452301, "step": 942 }, { "epoch": 2.5817932922655715, "grad_norm": 2.5087642669677734, "learning_rate": 8.708219178082191e-07, "log_odds_chosen": 0.5994299650192261, "log_odds_ratio": -0.6538931131362915, "logits/chosen": 0.25018686056137085, "logits/rejected": 0.22968673706054688, "logps/chosen": -2.214066982269287, "logps/rejected": -2.7095773220062256, "loss": 0.9727, "nll_loss": 0.9073562622070312, "rewards/accuracies": 0.625, "rewards/chosen": -0.2214067131280899, "rewards/margins": 0.049551017582416534, "rewards/rejected": -0.27095773816108704, "step": 943 }, { "epoch": 2.5845311430527036, "grad_norm": 2.3996684551239014, "learning_rate": 8.706849315068493e-07, "log_odds_chosen": 1.1119576692581177, "log_odds_ratio": -0.3607373535633087, "logits/chosen": 0.2111324965953827, "logits/rejected": 0.17375868558883667, "logps/chosen": -1.6168899536132812, "logps/rejected": -2.5802547931671143, "loss": 0.9802, "nll_loss": 0.9441579580307007, "rewards/accuracies": 0.875, "rewards/chosen": -0.16168901324272156, "rewards/margins": 0.0963364765048027, "rewards/rejected": -0.25802549719810486, "step": 944 }, { "epoch": 2.5872689938398357, "grad_norm": 2.4019808769226074, "learning_rate": 8.705479452054793e-07, "log_odds_chosen": 0.6272708773612976, "log_odds_ratio": -0.4837714433670044, "logits/chosen": 0.12224046885967255, "logits/rejected": 0.09160566329956055, "logps/chosen": -1.7627606391906738, "logps/rejected": -2.271138906478882, "loss": 0.9856, "nll_loss": 0.9372173547744751, "rewards/accuracies": 0.75, "rewards/chosen": -0.1762760877609253, "rewards/margins": 0.05083782225847244, "rewards/rejected": -0.22711388766765594, "step": 945 }, { "epoch": 2.5900068446269677, "grad_norm": 2.5837581157684326, "learning_rate": 8.704109589041095e-07, "log_odds_chosen": 0.22622022032737732, "log_odds_ratio": -0.6483003497123718, "logits/chosen": 0.3830628991127014, "logits/rejected": 0.3593387007713318, "logps/chosen": -2.1195507049560547, "logps/rejected": -2.298447847366333, "loss": 0.9735, "nll_loss": 0.9087013006210327, "rewards/accuracies": 0.625, "rewards/chosen": -0.21195505559444427, "rewards/margins": 0.017889736220240593, "rewards/rejected": -0.22984479367733002, "step": 946 }, { "epoch": 2.5927446954141, "grad_norm": 2.938074827194214, "learning_rate": 8.702739726027397e-07, "log_odds_chosen": 0.023766234517097473, "log_odds_ratio": -0.8030454516410828, "logits/chosen": 0.21855716407299042, "logits/rejected": 0.17970529198646545, "logps/chosen": -2.2914438247680664, "logps/rejected": -2.2935104370117188, "loss": 1.0533, "nll_loss": 0.9730350971221924, "rewards/accuracies": 0.625, "rewards/chosen": -0.2291443943977356, "rewards/margins": 0.00020663626492023468, "rewards/rejected": -0.22935102880001068, "step": 947 }, { "epoch": 2.5954825462012323, "grad_norm": 4.023411750793457, "learning_rate": 8.701369863013698e-07, "log_odds_chosen": -0.46697187423706055, "log_odds_ratio": -1.2612032890319824, "logits/chosen": 0.45219311118125916, "logits/rejected": 0.44246745109558105, "logps/chosen": -3.1299824714660645, "logps/rejected": -2.589607000350952, "loss": 0.9869, "nll_loss": 0.8607479333877563, "rewards/accuracies": 0.625, "rewards/chosen": -0.3129982352256775, "rewards/margins": -0.05403752624988556, "rewards/rejected": -0.25896069407463074, "step": 948 }, { "epoch": 2.5982203969883644, "grad_norm": 2.9010274410247803, "learning_rate": 8.699999999999999e-07, "log_odds_chosen": 0.24182219803333282, "log_odds_ratio": -0.6617171764373779, "logits/chosen": 0.36830583214759827, "logits/rejected": 0.3882448077201843, "logps/chosen": -2.3597617149353027, "logps/rejected": -2.5641283988952637, "loss": 1.0144, "nll_loss": 0.9482033848762512, "rewards/accuracies": 0.75, "rewards/chosen": -0.2359761744737625, "rewards/margins": 0.02043665014207363, "rewards/rejected": -0.2564128339290619, "step": 949 }, { "epoch": 2.6009582477754964, "grad_norm": 2.5675299167633057, "learning_rate": 8.698630136986301e-07, "log_odds_chosen": 0.802046537399292, "log_odds_ratio": -0.380445659160614, "logits/chosen": 0.3896968960762024, "logits/rejected": 0.3203132748603821, "logps/chosen": -1.5667442083358765, "logps/rejected": -2.2192139625549316, "loss": 1.0474, "nll_loss": 1.0093193054199219, "rewards/accuracies": 1.0, "rewards/chosen": -0.15667441487312317, "rewards/margins": 0.06524698436260223, "rewards/rejected": -0.2219214141368866, "step": 950 }, { "epoch": 2.6036960985626285, "grad_norm": 3.1605911254882812, "learning_rate": 8.697260273972602e-07, "log_odds_chosen": -0.12489458918571472, "log_odds_ratio": -0.841636061668396, "logits/chosen": 0.2766004204750061, "logits/rejected": 0.2543766498565674, "logps/chosen": -2.816150426864624, "logps/rejected": -2.7045562267303467, "loss": 1.0812, "nll_loss": 0.9970347881317139, "rewards/accuracies": 0.375, "rewards/chosen": -0.2816150486469269, "rewards/margins": -0.011159425601363182, "rewards/rejected": -0.27045562863349915, "step": 951 }, { "epoch": 2.6064339493497606, "grad_norm": 2.7655937671661377, "learning_rate": 8.695890410958903e-07, "log_odds_chosen": 0.5117419958114624, "log_odds_ratio": -0.6097086071968079, "logits/chosen": 0.2137949913740158, "logits/rejected": 0.1311565488576889, "logps/chosen": -1.8778417110443115, "logps/rejected": -2.336806058883667, "loss": 0.995, "nll_loss": 0.9340767860412598, "rewards/accuracies": 0.75, "rewards/chosen": -0.18778416514396667, "rewards/margins": 0.04589642584323883, "rewards/rejected": -0.2336806058883667, "step": 952 }, { "epoch": 2.6091718001368926, "grad_norm": 3.0593934059143066, "learning_rate": 8.694520547945205e-07, "log_odds_chosen": 0.5026190280914307, "log_odds_ratio": -0.716968297958374, "logits/chosen": 0.3345933258533478, "logits/rejected": 0.31037917733192444, "logps/chosen": -2.353423595428467, "logps/rejected": -2.743032455444336, "loss": 1.1452, "nll_loss": 1.0735490322113037, "rewards/accuracies": 0.625, "rewards/chosen": -0.2353423833847046, "rewards/margins": 0.038960836827754974, "rewards/rejected": -0.27430322766304016, "step": 953 }, { "epoch": 2.6119096509240247, "grad_norm": 2.3851706981658936, "learning_rate": 8.693150684931506e-07, "log_odds_chosen": 0.4556899070739746, "log_odds_ratio": -0.5262659788131714, "logits/chosen": 0.2989601790904999, "logits/rejected": 0.2644478380680084, "logps/chosen": -2.007075786590576, "logps/rejected": -2.3889167308807373, "loss": 1.0414, "nll_loss": 0.9887546896934509, "rewards/accuracies": 0.875, "rewards/chosen": -0.2007075697183609, "rewards/margins": 0.03818412125110626, "rewards/rejected": -0.23889169096946716, "step": 954 }, { "epoch": 2.614647501711157, "grad_norm": 3.0240936279296875, "learning_rate": 8.691780821917808e-07, "log_odds_chosen": 0.7087539434432983, "log_odds_ratio": -0.48996490240097046, "logits/chosen": 0.5646387338638306, "logits/rejected": 0.547718346118927, "logps/chosen": -2.4011685848236084, "logps/rejected": -3.0215790271759033, "loss": 1.0012, "nll_loss": 0.9521923065185547, "rewards/accuracies": 0.75, "rewards/chosen": -0.24011683464050293, "rewards/margins": 0.0620410330593586, "rewards/rejected": -0.3021579086780548, "step": 955 }, { "epoch": 2.617385352498289, "grad_norm": 2.791724681854248, "learning_rate": 8.690410958904109e-07, "log_odds_chosen": 0.0506509393453598, "log_odds_ratio": -0.7903391122817993, "logits/chosen": 0.31054675579071045, "logits/rejected": 0.2567256689071655, "logps/chosen": -2.661701202392578, "logps/rejected": -2.6634554862976074, "loss": 1.0356, "nll_loss": 0.956601619720459, "rewards/accuracies": 0.625, "rewards/chosen": -0.2661701440811157, "rewards/margins": 0.00017540156841278076, "rewards/rejected": -0.2663455307483673, "step": 956 }, { "epoch": 2.620123203285421, "grad_norm": 3.165052890777588, "learning_rate": 8.68904109589041e-07, "log_odds_chosen": 0.5157440900802612, "log_odds_ratio": -0.759838342666626, "logits/chosen": 0.42283767461776733, "logits/rejected": 0.3848397135734558, "logps/chosen": -2.557162284851074, "logps/rejected": -2.8978052139282227, "loss": 0.9376, "nll_loss": 0.8616448640823364, "rewards/accuracies": 0.75, "rewards/chosen": -0.2557162046432495, "rewards/margins": 0.03406432271003723, "rewards/rejected": -0.28978055715560913, "step": 957 }, { "epoch": 2.622861054072553, "grad_norm": 2.65073299407959, "learning_rate": 8.687671232876712e-07, "log_odds_chosen": 0.31302422285079956, "log_odds_ratio": -0.5852996706962585, "logits/chosen": 0.4244846999645233, "logits/rejected": 0.3899450898170471, "logps/chosen": -2.151550769805908, "logps/rejected": -2.4212827682495117, "loss": 1.0241, "nll_loss": 0.9655408263206482, "rewards/accuracies": 0.75, "rewards/chosen": -0.21515507996082306, "rewards/margins": 0.026973195374011993, "rewards/rejected": -0.24212828278541565, "step": 958 }, { "epoch": 2.625598904859685, "grad_norm": 2.865823984146118, "learning_rate": 8.686301369863012e-07, "log_odds_chosen": 0.13160201907157898, "log_odds_ratio": -0.6416199207305908, "logits/chosen": 0.2882533073425293, "logits/rejected": 0.25835689902305603, "logps/chosen": -2.3470630645751953, "logps/rejected": -2.4548115730285645, "loss": 1.0142, "nll_loss": 0.9500041604042053, "rewards/accuracies": 0.625, "rewards/chosen": -0.234706312417984, "rewards/margins": 0.010774836875498295, "rewards/rejected": -0.24548114836215973, "step": 959 }, { "epoch": 2.628336755646817, "grad_norm": 2.1417195796966553, "learning_rate": 8.684931506849314e-07, "log_odds_chosen": 0.8488304615020752, "log_odds_ratio": -0.4743618369102478, "logits/chosen": 0.2522228956222534, "logits/rejected": 0.20797684788703918, "logps/chosen": -1.798527479171753, "logps/rejected": -2.5753278732299805, "loss": 1.0419, "nll_loss": 0.9944939613342285, "rewards/accuracies": 0.875, "rewards/chosen": -0.17985273897647858, "rewards/margins": 0.0776800587773323, "rewards/rejected": -0.2575328052043915, "step": 960 }, { "epoch": 2.631074606433949, "grad_norm": 2.4402525424957275, "learning_rate": 8.683561643835616e-07, "log_odds_chosen": 0.5218319296836853, "log_odds_ratio": -0.5197701454162598, "logits/chosen": 0.3074989914894104, "logits/rejected": 0.24569851160049438, "logps/chosen": -2.0426225662231445, "logps/rejected": -2.4416744709014893, "loss": 1.0128, "nll_loss": 0.9608286619186401, "rewards/accuracies": 0.75, "rewards/chosen": -0.20426225662231445, "rewards/margins": 0.039905183017253876, "rewards/rejected": -0.24416744709014893, "step": 961 }, { "epoch": 2.6338124572210813, "grad_norm": 4.290811061859131, "learning_rate": 8.682191780821917e-07, "log_odds_chosen": -0.45875221490859985, "log_odds_ratio": -1.0567950010299683, "logits/chosen": 0.6145577430725098, "logits/rejected": 0.6689118146896362, "logps/chosen": -3.440098285675049, "logps/rejected": -2.985734462738037, "loss": 0.9685, "nll_loss": 0.8628169298171997, "rewards/accuracies": 0.5, "rewards/chosen": -0.3440098166465759, "rewards/margins": -0.04543638974428177, "rewards/rejected": -0.29857343435287476, "step": 962 }, { "epoch": 2.6365503080082133, "grad_norm": 2.3057098388671875, "learning_rate": 8.680821917808218e-07, "log_odds_chosen": 1.1698980331420898, "log_odds_ratio": -0.3682178258895874, "logits/chosen": 0.3828542232513428, "logits/rejected": 0.3160642981529236, "logps/chosen": -2.130002737045288, "logps/rejected": -3.219540596008301, "loss": 0.9838, "nll_loss": 0.9469770789146423, "rewards/accuracies": 0.75, "rewards/chosen": -0.21300028264522552, "rewards/margins": 0.10895377397537231, "rewards/rejected": -0.32195407152175903, "step": 963 }, { "epoch": 2.6392881587953454, "grad_norm": 2.553914785385132, "learning_rate": 8.67945205479452e-07, "log_odds_chosen": 0.6477475166320801, "log_odds_ratio": -0.5448828935623169, "logits/chosen": 0.37297534942626953, "logits/rejected": 0.4022391438484192, "logps/chosen": -2.485774517059326, "logps/rejected": -3.1158580780029297, "loss": 0.9849, "nll_loss": 0.9304234981536865, "rewards/accuracies": 0.625, "rewards/chosen": -0.24857746064662933, "rewards/margins": 0.0630083680152893, "rewards/rejected": -0.31158584356307983, "step": 964 }, { "epoch": 2.642026009582478, "grad_norm": 2.487217903137207, "learning_rate": 8.678082191780822e-07, "log_odds_chosen": 0.19759482145309448, "log_odds_ratio": -0.6319392919540405, "logits/chosen": 0.21023711562156677, "logits/rejected": 0.23826217651367188, "logps/chosen": -2.313843250274658, "logps/rejected": -2.4789154529571533, "loss": 0.9313, "nll_loss": 0.8681415319442749, "rewards/accuracies": 0.625, "rewards/chosen": -0.23138433694839478, "rewards/margins": 0.016507219523191452, "rewards/rejected": -0.24789154529571533, "step": 965 }, { "epoch": 2.64476386036961, "grad_norm": 2.310295581817627, "learning_rate": 8.676712328767122e-07, "log_odds_chosen": 0.25970906019210815, "log_odds_ratio": -0.6703516244888306, "logits/chosen": 0.3932238519191742, "logits/rejected": 0.3608572483062744, "logps/chosen": -2.0224809646606445, "logps/rejected": -2.2161669731140137, "loss": 1.0014, "nll_loss": 0.9343734979629517, "rewards/accuracies": 0.375, "rewards/chosen": -0.20224809646606445, "rewards/margins": 0.019368601962924004, "rewards/rejected": -0.2216166853904724, "step": 966 }, { "epoch": 2.647501711156742, "grad_norm": 2.464001178741455, "learning_rate": 8.675342465753425e-07, "log_odds_chosen": 0.1664186716079712, "log_odds_ratio": -0.7503754496574402, "logits/chosen": 0.42763474583625793, "logits/rejected": 0.4587557017803192, "logps/chosen": -2.3389790058135986, "logps/rejected": -2.479705333709717, "loss": 0.9856, "nll_loss": 0.9105964303016663, "rewards/accuracies": 0.625, "rewards/chosen": -0.23389792442321777, "rewards/margins": 0.014072621241211891, "rewards/rejected": -0.2479705512523651, "step": 967 }, { "epoch": 2.650239561943874, "grad_norm": 2.3335683345794678, "learning_rate": 8.673972602739726e-07, "log_odds_chosen": 0.31919339299201965, "log_odds_ratio": -0.5875036716461182, "logits/chosen": 0.4536794126033783, "logits/rejected": 0.43776339292526245, "logps/chosen": -1.9960083961486816, "logps/rejected": -2.2574751377105713, "loss": 0.9787, "nll_loss": 0.9199442863464355, "rewards/accuracies": 0.625, "rewards/chosen": -0.19960087537765503, "rewards/margins": 0.02614666521549225, "rewards/rejected": -0.2257475107908249, "step": 968 }, { "epoch": 2.652977412731006, "grad_norm": 2.555187702178955, "learning_rate": 8.672602739726028e-07, "log_odds_chosen": 0.857406497001648, "log_odds_ratio": -0.38863682746887207, "logits/chosen": 0.5371229648590088, "logits/rejected": 0.4756847321987152, "logps/chosen": -2.185544013977051, "logps/rejected": -2.940309762954712, "loss": 0.8893, "nll_loss": 0.8504636883735657, "rewards/accuracies": 0.875, "rewards/chosen": -0.21855439245700836, "rewards/margins": 0.07547656446695328, "rewards/rejected": -0.29403096437454224, "step": 969 }, { "epoch": 2.6557152635181382, "grad_norm": 2.3150172233581543, "learning_rate": 8.671232876712329e-07, "log_odds_chosen": 1.597016453742981, "log_odds_ratio": -0.44225871562957764, "logits/chosen": 0.3507792055606842, "logits/rejected": 0.2437702715396881, "logps/chosen": -1.8205957412719727, "logps/rejected": -3.3449695110321045, "loss": 0.9803, "nll_loss": 0.9360660314559937, "rewards/accuracies": 0.875, "rewards/chosen": -0.18205955624580383, "rewards/margins": 0.15243737399578094, "rewards/rejected": -0.33449694514274597, "step": 970 }, { "epoch": 2.6584531143052703, "grad_norm": 2.6886887550354004, "learning_rate": 8.66986301369863e-07, "log_odds_chosen": 1.1860222816467285, "log_odds_ratio": -0.5530551075935364, "logits/chosen": 0.37157905101776123, "logits/rejected": 0.3728223145008087, "logps/chosen": -2.3878822326660156, "logps/rejected": -3.520817279815674, "loss": 0.9825, "nll_loss": 0.9271826148033142, "rewards/accuracies": 0.75, "rewards/chosen": -0.23878823220729828, "rewards/margins": 0.11329348385334015, "rewards/rejected": -0.3520817160606384, "step": 971 }, { "epoch": 2.6611909650924024, "grad_norm": 4.955580234527588, "learning_rate": 8.668493150684932e-07, "log_odds_chosen": -0.11283669620752335, "log_odds_ratio": -0.9368141293525696, "logits/chosen": 0.2981114387512207, "logits/rejected": 0.27058106660842896, "logps/chosen": -2.64099383354187, "logps/rejected": -2.464902877807617, "loss": 0.9575, "nll_loss": 0.8638471364974976, "rewards/accuracies": 0.625, "rewards/chosen": -0.2640993893146515, "rewards/margins": -0.01760908216238022, "rewards/rejected": -0.24649029970169067, "step": 972 }, { "epoch": 2.6639288158795345, "grad_norm": 3.189444065093994, "learning_rate": 8.667123287671233e-07, "log_odds_chosen": -0.694710373878479, "log_odds_ratio": -1.245139718055725, "logits/chosen": 0.2613845765590668, "logits/rejected": 0.2396291047334671, "logps/chosen": -2.730781316757202, "logps/rejected": -2.048147678375244, "loss": 1.0886, "nll_loss": 0.9640398025512695, "rewards/accuracies": 0.375, "rewards/chosen": -0.27307814359664917, "rewards/margins": -0.06826334446668625, "rewards/rejected": -0.20481479167938232, "step": 973 }, { "epoch": 2.6666666666666665, "grad_norm": 2.6315767765045166, "learning_rate": 8.665753424657534e-07, "log_odds_chosen": 0.002658367156982422, "log_odds_ratio": -0.7369111180305481, "logits/chosen": 0.46584105491638184, "logits/rejected": 0.4133967161178589, "logps/chosen": -2.2069125175476074, "logps/rejected": -2.1892073154449463, "loss": 0.9145, "nll_loss": 0.8408094644546509, "rewards/accuracies": 0.5, "rewards/chosen": -0.2206912636756897, "rewards/margins": -0.0017705298960208893, "rewards/rejected": -0.2189207375049591, "step": 974 }, { "epoch": 2.669404517453799, "grad_norm": 4.334717273712158, "learning_rate": 8.664383561643836e-07, "log_odds_chosen": -0.7596582174301147, "log_odds_ratio": -1.2679461240768433, "logits/chosen": 0.5813795328140259, "logits/rejected": 0.6460098028182983, "logps/chosen": -2.880458116531372, "logps/rejected": -2.1811652183532715, "loss": 0.947, "nll_loss": 0.8201743364334106, "rewards/accuracies": 0.125, "rewards/chosen": -0.28804582357406616, "rewards/margins": -0.06992930173873901, "rewards/rejected": -0.21811652183532715, "step": 975 }, { "epoch": 2.672142368240931, "grad_norm": 2.4464855194091797, "learning_rate": 8.663013698630136e-07, "log_odds_chosen": 0.7949857115745544, "log_odds_ratio": -0.5099427700042725, "logits/chosen": 0.3424520194530487, "logits/rejected": 0.2931642532348633, "logps/chosen": -2.1014254093170166, "logps/rejected": -2.794330358505249, "loss": 0.9885, "nll_loss": 0.9374656081199646, "rewards/accuracies": 0.75, "rewards/chosen": -0.21014253795146942, "rewards/margins": 0.06929050385951996, "rewards/rejected": -0.2794330418109894, "step": 976 }, { "epoch": 2.674880219028063, "grad_norm": 3.023178815841675, "learning_rate": 8.661643835616438e-07, "log_odds_chosen": -0.11468347907066345, "log_odds_ratio": -0.8719264268875122, "logits/chosen": 0.4646192193031311, "logits/rejected": 0.39571475982666016, "logps/chosen": -2.736218214035034, "logps/rejected": -2.5729711055755615, "loss": 1.0091, "nll_loss": 0.9219115972518921, "rewards/accuracies": 0.625, "rewards/chosen": -0.2736218273639679, "rewards/margins": -0.016324704512953758, "rewards/rejected": -0.2572970986366272, "step": 977 }, { "epoch": 2.6776180698151952, "grad_norm": 2.5318284034729004, "learning_rate": 8.66027397260274e-07, "log_odds_chosen": 0.9225607514381409, "log_odds_ratio": -0.4034842252731323, "logits/chosen": 0.3314301371574402, "logits/rejected": 0.2799351215362549, "logps/chosen": -1.586327314376831, "logps/rejected": -2.393007516860962, "loss": 0.9306, "nll_loss": 0.8902765512466431, "rewards/accuracies": 1.0, "rewards/chosen": -0.15863272547721863, "rewards/margins": 0.08066803216934204, "rewards/rejected": -0.23930075764656067, "step": 978 }, { "epoch": 2.6803559206023273, "grad_norm": 2.8500609397888184, "learning_rate": 8.658904109589041e-07, "log_odds_chosen": 0.09551683068275452, "log_odds_ratio": -0.7918504476547241, "logits/chosen": 0.532958447933197, "logits/rejected": 0.5158541202545166, "logps/chosen": -2.104473352432251, "logps/rejected": -2.169480800628662, "loss": 0.9989, "nll_loss": 0.9197614192962646, "rewards/accuracies": 0.5, "rewards/chosen": -0.21044734120368958, "rewards/margins": 0.006500745192170143, "rewards/rejected": -0.21694809198379517, "step": 979 }, { "epoch": 2.6830937713894594, "grad_norm": 3.358138084411621, "learning_rate": 8.657534246575342e-07, "log_odds_chosen": 0.6954127550125122, "log_odds_ratio": -0.6070452928543091, "logits/chosen": 0.5922069549560547, "logits/rejected": 0.631912112236023, "logps/chosen": -2.831387758255005, "logps/rejected": -3.47833514213562, "loss": 0.8497, "nll_loss": 0.7890071868896484, "rewards/accuracies": 0.75, "rewards/chosen": -0.28313878178596497, "rewards/margins": 0.06469472497701645, "rewards/rejected": -0.347833514213562, "step": 980 }, { "epoch": 2.6858316221765914, "grad_norm": 3.3139989376068115, "learning_rate": 8.656164383561644e-07, "log_odds_chosen": 0.33629053831100464, "log_odds_ratio": -0.8011802434921265, "logits/chosen": 0.7276326417922974, "logits/rejected": 0.7900798320770264, "logps/chosen": -2.883324146270752, "logps/rejected": -3.1180973052978516, "loss": 0.9791, "nll_loss": 0.898959755897522, "rewards/accuracies": 0.375, "rewards/chosen": -0.28833240270614624, "rewards/margins": 0.023477325215935707, "rewards/rejected": -0.3118097186088562, "step": 981 }, { "epoch": 2.6885694729637235, "grad_norm": 5.037440299987793, "learning_rate": 8.654794520547945e-07, "log_odds_chosen": -0.10692420601844788, "log_odds_ratio": -0.8277655243873596, "logits/chosen": 0.5545164346694946, "logits/rejected": 0.6063691973686218, "logps/chosen": -2.9484493732452393, "logps/rejected": -2.828178882598877, "loss": 0.9554, "nll_loss": 0.8726717233657837, "rewards/accuracies": 0.25, "rewards/chosen": -0.29484495520591736, "rewards/margins": -0.012027069926261902, "rewards/rejected": -0.28281790018081665, "step": 982 }, { "epoch": 2.6913073237508556, "grad_norm": 2.8158957958221436, "learning_rate": 8.653424657534246e-07, "log_odds_chosen": -0.04292864352464676, "log_odds_ratio": -0.8218511343002319, "logits/chosen": 0.38804441690444946, "logits/rejected": 0.40674325823783875, "logps/chosen": -2.5471577644348145, "logps/rejected": -2.4959311485290527, "loss": 1.0392, "nll_loss": 0.9569829106330872, "rewards/accuracies": 0.625, "rewards/chosen": -0.25471577048301697, "rewards/margins": -0.005122631788253784, "rewards/rejected": -0.24959313869476318, "step": 983 }, { "epoch": 2.6940451745379876, "grad_norm": 3.213273763656616, "learning_rate": 8.652054794520548e-07, "log_odds_chosen": 0.2352348268032074, "log_odds_ratio": -0.6835052967071533, "logits/chosen": 0.7490493059158325, "logits/rejected": 0.7197554111480713, "logps/chosen": -2.823627471923828, "logps/rejected": -3.048758029937744, "loss": 0.8632, "nll_loss": 0.794826865196228, "rewards/accuracies": 0.5, "rewards/chosen": -0.2823627293109894, "rewards/margins": 0.022513052448630333, "rewards/rejected": -0.30487579107284546, "step": 984 }, { "epoch": 2.6967830253251197, "grad_norm": 2.689863681793213, "learning_rate": 8.650684931506849e-07, "log_odds_chosen": 1.0908082723617554, "log_odds_ratio": -0.39355531334877014, "logits/chosen": 0.16035278141498566, "logits/rejected": 0.07165723294019699, "logps/chosen": -1.6876800060272217, "logps/rejected": -2.615553379058838, "loss": 0.9414, "nll_loss": 0.9020514488220215, "rewards/accuracies": 0.875, "rewards/chosen": -0.1687680035829544, "rewards/margins": 0.09278735518455505, "rewards/rejected": -0.26155534386634827, "step": 985 }, { "epoch": 2.6995208761122518, "grad_norm": 3.8738017082214355, "learning_rate": 8.649315068493151e-07, "log_odds_chosen": 0.48409587144851685, "log_odds_ratio": -0.5844541788101196, "logits/chosen": 0.5840629935264587, "logits/rejected": 0.5630532503128052, "logps/chosen": -1.9832160472869873, "logps/rejected": -2.445866346359253, "loss": 0.8899, "nll_loss": 0.8314809203147888, "rewards/accuracies": 0.625, "rewards/chosen": -0.1983216106891632, "rewards/margins": 0.04626502841711044, "rewards/rejected": -0.24458663165569305, "step": 986 }, { "epoch": 2.702258726899384, "grad_norm": 5.27170991897583, "learning_rate": 8.647945205479452e-07, "log_odds_chosen": -0.09129215776920319, "log_odds_ratio": -0.799381673336029, "logits/chosen": 0.4446397125720978, "logits/rejected": 0.41338029503822327, "logps/chosen": -2.817636013031006, "logps/rejected": -2.700221538543701, "loss": 1.0727, "nll_loss": 0.992745578289032, "rewards/accuracies": 0.5, "rewards/chosen": -0.28176361322402954, "rewards/margins": -0.011741437017917633, "rewards/rejected": -0.2700221538543701, "step": 987 }, { "epoch": 2.704996577686516, "grad_norm": 2.8507578372955322, "learning_rate": 8.646575342465753e-07, "log_odds_chosen": 0.6748219728469849, "log_odds_ratio": -0.5074089169502258, "logits/chosen": 0.5242282748222351, "logits/rejected": 0.5850450992584229, "logps/chosen": -2.1548008918762207, "logps/rejected": -2.7766332626342773, "loss": 0.949, "nll_loss": 0.8982307314872742, "rewards/accuracies": 0.875, "rewards/chosen": -0.21548010408878326, "rewards/margins": 0.06218322366476059, "rewards/rejected": -0.27766332030296326, "step": 988 }, { "epoch": 2.707734428473648, "grad_norm": 3.34256911277771, "learning_rate": 8.645205479452055e-07, "log_odds_chosen": 0.5177929401397705, "log_odds_ratio": -0.5661228895187378, "logits/chosen": 0.7056323289871216, "logits/rejected": 0.6684137582778931, "logps/chosen": -2.366246461868286, "logps/rejected": -2.7922372817993164, "loss": 0.9028, "nll_loss": 0.8462018966674805, "rewards/accuracies": 0.625, "rewards/chosen": -0.23662464320659637, "rewards/margins": 0.042599089443683624, "rewards/rejected": -0.2792237401008606, "step": 989 }, { "epoch": 2.71047227926078, "grad_norm": 2.7082698345184326, "learning_rate": 8.643835616438355e-07, "log_odds_chosen": 0.4555620551109314, "log_odds_ratio": -0.6316752433776855, "logits/chosen": 0.526195764541626, "logits/rejected": 0.530767023563385, "logps/chosen": -2.664320468902588, "logps/rejected": -3.056117057800293, "loss": 0.9394, "nll_loss": 0.8762512803077698, "rewards/accuracies": 0.75, "rewards/chosen": -0.2664320468902588, "rewards/margins": 0.03917965292930603, "rewards/rejected": -0.3056116998195648, "step": 990 }, { "epoch": 2.713210130047912, "grad_norm": 3.044213056564331, "learning_rate": 8.642465753424657e-07, "log_odds_chosen": 0.387887567281723, "log_odds_ratio": -0.6222581267356873, "logits/chosen": 0.5060203671455383, "logits/rejected": 0.527056872844696, "logps/chosen": -2.5332045555114746, "logps/rejected": -2.846407413482666, "loss": 0.9792, "nll_loss": 0.9170211553573608, "rewards/accuracies": 0.75, "rewards/chosen": -0.25332045555114746, "rewards/margins": 0.03132028877735138, "rewards/rejected": -0.28464075922966003, "step": 991 }, { "epoch": 2.7159479808350446, "grad_norm": 2.492713212966919, "learning_rate": 8.641095890410959e-07, "log_odds_chosen": 0.392572283744812, "log_odds_ratio": -0.535751461982727, "logits/chosen": 0.4895632266998291, "logits/rejected": 0.49868857860565186, "logps/chosen": -2.137777805328369, "logps/rejected": -2.478490114212036, "loss": 0.909, "nll_loss": 0.8554131984710693, "rewards/accuracies": 0.75, "rewards/chosen": -0.21377778053283691, "rewards/margins": 0.03407122567296028, "rewards/rejected": -0.2478490173816681, "step": 992 }, { "epoch": 2.7186858316221767, "grad_norm": 3.0979163646698, "learning_rate": 8.63972602739726e-07, "log_odds_chosen": 0.4868783950805664, "log_odds_ratio": -0.5475761890411377, "logits/chosen": 0.46970242261886597, "logits/rejected": 0.40767133235931396, "logps/chosen": -2.393028736114502, "logps/rejected": -2.804222583770752, "loss": 0.9095, "nll_loss": 0.8547767400741577, "rewards/accuracies": 0.75, "rewards/chosen": -0.2393028885126114, "rewards/margins": 0.041119374334812164, "rewards/rejected": -0.28042227029800415, "step": 993 }, { "epoch": 2.7214236824093088, "grad_norm": 3.7625012397766113, "learning_rate": 8.638356164383561e-07, "log_odds_chosen": -0.1384108066558838, "log_odds_ratio": -0.8040872812271118, "logits/chosen": 0.6473398804664612, "logits/rejected": 0.705264151096344, "logps/chosen": -2.588019609451294, "logps/rejected": -2.4540724754333496, "loss": 0.8794, "nll_loss": 0.7990410327911377, "rewards/accuracies": 0.625, "rewards/chosen": -0.25880196690559387, "rewards/margins": -0.013394743204116821, "rewards/rejected": -0.24540722370147705, "step": 994 }, { "epoch": 2.724161533196441, "grad_norm": 3.3127012252807617, "learning_rate": 8.636986301369863e-07, "log_odds_chosen": 0.5634958148002625, "log_odds_ratio": -0.6674911975860596, "logits/chosen": 0.5733106136322021, "logits/rejected": 0.4641822874546051, "logps/chosen": -2.355886936187744, "logps/rejected": -2.7687056064605713, "loss": 0.8311, "nll_loss": 0.7643604874610901, "rewards/accuracies": 0.625, "rewards/chosen": -0.2355886846780777, "rewards/margins": 0.04128187522292137, "rewards/rejected": -0.2768705487251282, "step": 995 }, { "epoch": 2.726899383983573, "grad_norm": 2.584993839263916, "learning_rate": 8.635616438356164e-07, "log_odds_chosen": 0.6557730436325073, "log_odds_ratio": -0.4781755208969116, "logits/chosen": 0.39750486612319946, "logits/rejected": 0.32148802280426025, "logps/chosen": -2.379929780960083, "logps/rejected": -2.970203399658203, "loss": 0.9607, "nll_loss": 0.9128590226173401, "rewards/accuracies": 0.75, "rewards/chosen": -0.23799298703670502, "rewards/margins": 0.059027329087257385, "rewards/rejected": -0.2970203161239624, "step": 996 }, { "epoch": 2.729637234770705, "grad_norm": 2.7054548263549805, "learning_rate": 8.634246575342465e-07, "log_odds_chosen": 0.18069186806678772, "log_odds_ratio": -0.6649616956710815, "logits/chosen": 0.5831355452537537, "logits/rejected": 0.6840686798095703, "logps/chosen": -1.9773943424224854, "logps/rejected": -2.086698293685913, "loss": 0.8637, "nll_loss": 0.7971667051315308, "rewards/accuracies": 0.625, "rewards/chosen": -0.19773942232131958, "rewards/margins": 0.010930401273071766, "rewards/rejected": -0.20866982638835907, "step": 997 }, { "epoch": 2.732375085557837, "grad_norm": 3.0240259170532227, "learning_rate": 8.632876712328767e-07, "log_odds_chosen": 0.449218213558197, "log_odds_ratio": -0.574155867099762, "logits/chosen": 0.7314779162406921, "logits/rejected": 0.785907506942749, "logps/chosen": -2.7228598594665527, "logps/rejected": -3.1538047790527344, "loss": 0.8506, "nll_loss": 0.7931894659996033, "rewards/accuracies": 0.5, "rewards/chosen": -0.27228599786758423, "rewards/margins": 0.043094515800476074, "rewards/rejected": -0.3153805136680603, "step": 998 }, { "epoch": 2.735112936344969, "grad_norm": 2.3848462104797363, "learning_rate": 8.631506849315068e-07, "log_odds_chosen": 0.42305976152420044, "log_odds_ratio": -0.5885823965072632, "logits/chosen": 0.49255216121673584, "logits/rejected": 0.39562609791755676, "logps/chosen": -2.1059608459472656, "logps/rejected": -2.460641860961914, "loss": 0.9874, "nll_loss": 0.9285029172897339, "rewards/accuracies": 0.75, "rewards/chosen": -0.21059608459472656, "rewards/margins": 0.03546810522675514, "rewards/rejected": -0.2460642158985138, "step": 999 }, { "epoch": 2.737850787132101, "grad_norm": 2.4115641117095947, "learning_rate": 8.63013698630137e-07, "log_odds_chosen": 0.8111875057220459, "log_odds_ratio": -0.40630292892456055, "logits/chosen": 0.6005163192749023, "logits/rejected": 0.5821516513824463, "logps/chosen": -2.1442365646362305, "logps/rejected": -2.8763742446899414, "loss": 0.9046, "nll_loss": 0.8639805912971497, "rewards/accuracies": 0.875, "rewards/chosen": -0.21442365646362305, "rewards/margins": 0.07321379333734512, "rewards/rejected": -0.28763747215270996, "step": 1000 }, { "epoch": 2.7405886379192332, "grad_norm": 2.300856113433838, "learning_rate": 8.628767123287671e-07, "log_odds_chosen": 0.7254111766815186, "log_odds_ratio": -0.45250922441482544, "logits/chosen": 0.645506739616394, "logits/rejected": 0.6845307350158691, "logps/chosen": -2.4350404739379883, "logps/rejected": -3.0983333587646484, "loss": 0.8576, "nll_loss": 0.8123853206634521, "rewards/accuracies": 0.875, "rewards/chosen": -0.24350404739379883, "rewards/margins": 0.06632924824953079, "rewards/rejected": -0.3098333179950714, "step": 1001 }, { "epoch": 2.7433264887063658, "grad_norm": 2.903780698776245, "learning_rate": 8.627397260273972e-07, "log_odds_chosen": 0.5929061770439148, "log_odds_ratio": -0.5954344272613525, "logits/chosen": 0.4460272789001465, "logits/rejected": 0.38313359022140503, "logps/chosen": -2.303154468536377, "logps/rejected": -2.804741859436035, "loss": 1.0494, "nll_loss": 0.9898495674133301, "rewards/accuracies": 0.75, "rewards/chosen": -0.2303154468536377, "rewards/margins": 0.05015873908996582, "rewards/rejected": -0.2804741859436035, "step": 1002 }, { "epoch": 2.746064339493498, "grad_norm": 2.1804070472717285, "learning_rate": 8.626027397260274e-07, "log_odds_chosen": 1.1748573780059814, "log_odds_ratio": -0.3832986056804657, "logits/chosen": 0.6857946515083313, "logits/rejected": 0.6871272921562195, "logps/chosen": -1.943784475326538, "logps/rejected": -2.9649744033813477, "loss": 0.8472, "nll_loss": 0.8088316917419434, "rewards/accuracies": 0.875, "rewards/chosen": -0.19437846541404724, "rewards/margins": 0.10211897641420364, "rewards/rejected": -0.2964974343776703, "step": 1003 }, { "epoch": 2.74880219028063, "grad_norm": 2.6783387660980225, "learning_rate": 8.624657534246575e-07, "log_odds_chosen": 0.9709850549697876, "log_odds_ratio": -0.40137237310409546, "logits/chosen": 0.7630183696746826, "logits/rejected": 0.771114706993103, "logps/chosen": -1.932978630065918, "logps/rejected": -2.7794840335845947, "loss": 0.8548, "nll_loss": 0.8146556615829468, "rewards/accuracies": 0.875, "rewards/chosen": -0.1932978630065918, "rewards/margins": 0.08465054631233215, "rewards/rejected": -0.27794840931892395, "step": 1004 }, { "epoch": 2.751540041067762, "grad_norm": 2.4283759593963623, "learning_rate": 8.623287671232876e-07, "log_odds_chosen": 0.7553892135620117, "log_odds_ratio": -0.4060037136077881, "logits/chosen": 0.478951632976532, "logits/rejected": 0.39070266485214233, "logps/chosen": -1.4603548049926758, "logps/rejected": -2.075470447540283, "loss": 1.0086, "nll_loss": 0.9679622054100037, "rewards/accuracies": 1.0, "rewards/chosen": -0.14603549242019653, "rewards/margins": 0.06151154264807701, "rewards/rejected": -0.20754702389240265, "step": 1005 }, { "epoch": 2.754277891854894, "grad_norm": 3.480381727218628, "learning_rate": 8.621917808219178e-07, "log_odds_chosen": -0.47294846177101135, "log_odds_ratio": -1.1301202774047852, "logits/chosen": 0.5784648656845093, "logits/rejected": 0.5800126194953918, "logps/chosen": -3.2151596546173096, "logps/rejected": -2.7069602012634277, "loss": 0.9804, "nll_loss": 0.867339015007019, "rewards/accuracies": 0.5, "rewards/chosen": -0.3215160071849823, "rewards/margins": -0.05081997811794281, "rewards/rejected": -0.2706960141658783, "step": 1006 }, { "epoch": 2.757015742642026, "grad_norm": 2.55287766456604, "learning_rate": 8.620547945205479e-07, "log_odds_chosen": 0.5484952330589294, "log_odds_ratio": -0.48385515809059143, "logits/chosen": 0.3044406771659851, "logits/rejected": 0.22346937656402588, "logps/chosen": -2.312114953994751, "logps/rejected": -2.805312395095825, "loss": 0.9397, "nll_loss": 0.8913014531135559, "rewards/accuracies": 0.75, "rewards/chosen": -0.23121148347854614, "rewards/margins": 0.049319759011268616, "rewards/rejected": -0.28053122758865356, "step": 1007 }, { "epoch": 2.759753593429158, "grad_norm": 2.7807185649871826, "learning_rate": 8.61917808219178e-07, "log_odds_chosen": -0.1359996795654297, "log_odds_ratio": -0.8662002086639404, "logits/chosen": 0.5615996718406677, "logits/rejected": 0.620711088180542, "logps/chosen": -2.8135552406311035, "logps/rejected": -2.6785993576049805, "loss": 0.9467, "nll_loss": 0.8600403070449829, "rewards/accuracies": 0.5, "rewards/chosen": -0.28135553002357483, "rewards/margins": -0.013495570048689842, "rewards/rejected": -0.26785996556282043, "step": 1008 }, { "epoch": 2.7624914442162902, "grad_norm": 2.9290099143981934, "learning_rate": 8.617808219178082e-07, "log_odds_chosen": 0.035514771938323975, "log_odds_ratio": -0.8093839287757874, "logits/chosen": 0.5647112131118774, "logits/rejected": 0.5688819885253906, "logps/chosen": -2.636820077896118, "logps/rejected": -2.6199097633361816, "loss": 0.9147, "nll_loss": 0.8337329626083374, "rewards/accuracies": 0.625, "rewards/chosen": -0.2636820077896118, "rewards/margins": -0.0016910294070839882, "rewards/rejected": -0.2619909644126892, "step": 1009 }, { "epoch": 2.7652292950034223, "grad_norm": 2.283517837524414, "learning_rate": 8.616438356164383e-07, "log_odds_chosen": 0.7279014587402344, "log_odds_ratio": -0.4199766516685486, "logits/chosen": 0.6035681962966919, "logits/rejected": 0.4934619963169098, "logps/chosen": -1.764410376548767, "logps/rejected": -2.40128755569458, "loss": 0.98, "nll_loss": 0.9380384087562561, "rewards/accuracies": 0.875, "rewards/chosen": -0.1764410436153412, "rewards/margins": 0.06368773430585861, "rewards/rejected": -0.2401287704706192, "step": 1010 }, { "epoch": 2.7679671457905544, "grad_norm": 2.4787211418151855, "learning_rate": 8.615068493150684e-07, "log_odds_chosen": 0.17710056900978088, "log_odds_ratio": -0.7028861045837402, "logits/chosen": 0.5961141586303711, "logits/rejected": 0.6232948303222656, "logps/chosen": -1.922186255455017, "logps/rejected": -2.025404691696167, "loss": 0.8963, "nll_loss": 0.8260100483894348, "rewards/accuracies": 0.625, "rewards/chosen": -0.1922186315059662, "rewards/margins": 0.0103218425065279, "rewards/rejected": -0.20254047214984894, "step": 1011 }, { "epoch": 2.7707049965776864, "grad_norm": 3.7271156311035156, "learning_rate": 8.613698630136986e-07, "log_odds_chosen": 0.10177057236433029, "log_odds_ratio": -0.8518058657646179, "logits/chosen": 0.5924146771430969, "logits/rejected": 0.5292119979858398, "logps/chosen": -2.6761081218719482, "logps/rejected": -2.7122435569763184, "loss": 0.9518, "nll_loss": 0.8666242957115173, "rewards/accuracies": 0.625, "rewards/chosen": -0.2676108181476593, "rewards/margins": 0.003613542765378952, "rewards/rejected": -0.27122437953948975, "step": 1012 }, { "epoch": 2.7734428473648185, "grad_norm": 2.5862298011779785, "learning_rate": 8.612328767123287e-07, "log_odds_chosen": 1.020928978919983, "log_odds_ratio": -0.41434401273727417, "logits/chosen": 0.453449010848999, "logits/rejected": 0.4574473202228546, "logps/chosen": -1.9587881565093994, "logps/rejected": -2.8062667846679688, "loss": 0.8472, "nll_loss": 0.8057297468185425, "rewards/accuracies": 0.875, "rewards/chosen": -0.19587881863117218, "rewards/margins": 0.08474786579608917, "rewards/rejected": -0.28062668442726135, "step": 1013 }, { "epoch": 2.7761806981519506, "grad_norm": 2.5217506885528564, "learning_rate": 8.610958904109589e-07, "log_odds_chosen": -0.43321147561073303, "log_odds_ratio": -1.0286016464233398, "logits/chosen": 0.30377793312072754, "logits/rejected": 0.2795957922935486, "logps/chosen": -2.4022505283355713, "logps/rejected": -2.002802848815918, "loss": 0.9873, "nll_loss": 0.8844351768493652, "rewards/accuracies": 0.25, "rewards/chosen": -0.24022504687309265, "rewards/margins": -0.03994477540254593, "rewards/rejected": -0.20028027892112732, "step": 1014 }, { "epoch": 2.7789185489390826, "grad_norm": 2.951556921005249, "learning_rate": 8.60958904109589e-07, "log_odds_chosen": 0.5910362005233765, "log_odds_ratio": -0.6920099258422852, "logits/chosen": 0.8037994503974915, "logits/rejected": 0.8423030376434326, "logps/chosen": -2.7121529579162598, "logps/rejected": -3.2393481731414795, "loss": 0.8456, "nll_loss": 0.7763622999191284, "rewards/accuracies": 0.75, "rewards/chosen": -0.2712153196334839, "rewards/margins": 0.05271951109170914, "rewards/rejected": -0.3239348232746124, "step": 1015 }, { "epoch": 2.7816563997262147, "grad_norm": 4.7630534172058105, "learning_rate": 8.608219178082191e-07, "log_odds_chosen": -0.13131192326545715, "log_odds_ratio": -1.09952712059021, "logits/chosen": 0.6368913054466248, "logits/rejected": 0.6407813429832458, "logps/chosen": -3.0186574459075928, "logps/rejected": -2.967604160308838, "loss": 1.0843, "nll_loss": 0.9743919372558594, "rewards/accuracies": 0.25, "rewards/chosen": -0.30186575651168823, "rewards/margins": -0.005105316638946533, "rewards/rejected": -0.2967604398727417, "step": 1016 }, { "epoch": 2.7843942505133468, "grad_norm": 2.971205711364746, "learning_rate": 8.606849315068493e-07, "log_odds_chosen": 0.465517520904541, "log_odds_ratio": -0.5647374987602234, "logits/chosen": 0.6446245312690735, "logits/rejected": 0.631139874458313, "logps/chosen": -2.361567497253418, "logps/rejected": -2.779057741165161, "loss": 0.9121, "nll_loss": 0.8556286096572876, "rewards/accuracies": 0.75, "rewards/chosen": -0.23615676164627075, "rewards/margins": 0.041749030351638794, "rewards/rejected": -0.27790579199790955, "step": 1017 }, { "epoch": 2.7871321013004793, "grad_norm": 3.0480780601501465, "learning_rate": 8.605479452054794e-07, "log_odds_chosen": 0.15989679098129272, "log_odds_ratio": -0.7933072447776794, "logits/chosen": 0.6424799561500549, "logits/rejected": 0.6443426012992859, "logps/chosen": -2.3859546184539795, "logps/rejected": -2.47025465965271, "loss": 1.0139, "nll_loss": 0.9346137046813965, "rewards/accuracies": 0.875, "rewards/chosen": -0.23859547078609467, "rewards/margins": 0.008430011570453644, "rewards/rejected": -0.2470254749059677, "step": 1018 }, { "epoch": 2.7898699520876113, "grad_norm": 4.289907932281494, "learning_rate": 8.604109589041095e-07, "log_odds_chosen": -0.2790045142173767, "log_odds_ratio": -1.0421741008758545, "logits/chosen": 0.5126658082008362, "logits/rejected": 0.5089046359062195, "logps/chosen": -2.9286484718322754, "logps/rejected": -2.6262388229370117, "loss": 0.98, "nll_loss": 0.8758249878883362, "rewards/accuracies": 0.5, "rewards/chosen": -0.2928648591041565, "rewards/margins": -0.0302409827709198, "rewards/rejected": -0.2626238763332367, "step": 1019 }, { "epoch": 2.7926078028747434, "grad_norm": 2.56842041015625, "learning_rate": 8.602739726027397e-07, "log_odds_chosen": 0.20676177740097046, "log_odds_ratio": -0.7505224943161011, "logits/chosen": 0.48771166801452637, "logits/rejected": 0.4424092769622803, "logps/chosen": -2.3747973442077637, "logps/rejected": -2.546220302581787, "loss": 1.0414, "nll_loss": 0.9663655161857605, "rewards/accuracies": 0.375, "rewards/chosen": -0.23747971653938293, "rewards/margins": 0.01714230515062809, "rewards/rejected": -0.25462204217910767, "step": 1020 }, { "epoch": 2.7953456536618755, "grad_norm": 2.502471923828125, "learning_rate": 8.601369863013698e-07, "log_odds_chosen": -0.10536786913871765, "log_odds_ratio": -0.8668612241744995, "logits/chosen": 0.5300357341766357, "logits/rejected": 0.43321919441223145, "logps/chosen": -2.2061569690704346, "logps/rejected": -2.0623626708984375, "loss": 1.0073, "nll_loss": 0.9206099510192871, "rewards/accuracies": 0.5, "rewards/chosen": -0.2206157147884369, "rewards/margins": -0.014379430562257767, "rewards/rejected": -0.20623628795146942, "step": 1021 }, { "epoch": 2.7980835044490076, "grad_norm": 2.282878875732422, "learning_rate": 8.599999999999999e-07, "log_odds_chosen": 1.1130331754684448, "log_odds_ratio": -0.40046000480651855, "logits/chosen": 0.39448606967926025, "logits/rejected": 0.22329077124595642, "logps/chosen": -1.2903378009796143, "logps/rejected": -2.2386176586151123, "loss": 0.9794, "nll_loss": 0.9393110871315002, "rewards/accuracies": 0.875, "rewards/chosen": -0.12903377413749695, "rewards/margins": 0.09482799470424652, "rewards/rejected": -0.22386178374290466, "step": 1022 }, { "epoch": 2.8008213552361396, "grad_norm": 3.01104736328125, "learning_rate": 8.598630136986301e-07, "log_odds_chosen": -0.00037422776222229004, "log_odds_ratio": -0.8147628307342529, "logits/chosen": 0.4368872344493866, "logits/rejected": 0.341177761554718, "logps/chosen": -2.1959228515625, "logps/rejected": -2.1431870460510254, "loss": 1.0607, "nll_loss": 0.9792224168777466, "rewards/accuracies": 0.75, "rewards/chosen": -0.21959227323532104, "rewards/margins": -0.005273574963212013, "rewards/rejected": -0.21431869268417358, "step": 1023 }, { "epoch": 2.8035592060232717, "grad_norm": 2.761443853378296, "learning_rate": 8.597260273972602e-07, "log_odds_chosen": 0.32348713278770447, "log_odds_ratio": -0.6441510915756226, "logits/chosen": 0.4692583680152893, "logits/rejected": 0.4020039439201355, "logps/chosen": -2.319366931915283, "logps/rejected": -2.57924747467041, "loss": 1.0358, "nll_loss": 0.9714240431785583, "rewards/accuracies": 0.75, "rewards/chosen": -0.23193669319152832, "rewards/margins": 0.025988029316067696, "rewards/rejected": -0.25792473554611206, "step": 1024 }, { "epoch": 2.8062970568104038, "grad_norm": 2.3141918182373047, "learning_rate": 8.595890410958903e-07, "log_odds_chosen": 0.461514949798584, "log_odds_ratio": -0.538642168045044, "logits/chosen": 0.6034901142120361, "logits/rejected": 0.6017262935638428, "logps/chosen": -2.3418803215026855, "logps/rejected": -2.760406255722046, "loss": 0.8787, "nll_loss": 0.8248553276062012, "rewards/accuracies": 0.75, "rewards/chosen": -0.234188050031662, "rewards/margins": 0.04185258224606514, "rewards/rejected": -0.27604061365127563, "step": 1025 }, { "epoch": 2.809034907597536, "grad_norm": 3.327160358428955, "learning_rate": 8.594520547945205e-07, "log_odds_chosen": 0.7664427161216736, "log_odds_ratio": -0.5672484636306763, "logits/chosen": 0.5345324277877808, "logits/rejected": 0.5530620217323303, "logps/chosen": -2.586155414581299, "logps/rejected": -3.2892942428588867, "loss": 0.9269, "nll_loss": 0.8702025413513184, "rewards/accuracies": 0.875, "rewards/chosen": -0.25861555337905884, "rewards/margins": 0.07031390815973282, "rewards/rejected": -0.32892945408821106, "step": 1026 }, { "epoch": 2.811772758384668, "grad_norm": 1.9826582670211792, "learning_rate": 8.593150684931506e-07, "log_odds_chosen": 1.4984859228134155, "log_odds_ratio": -0.3250523507595062, "logits/chosen": 0.5202863216400146, "logits/rejected": 0.44299790263175964, "logps/chosen": -1.6614389419555664, "logps/rejected": -2.9871225357055664, "loss": 0.9087, "nll_loss": 0.8761819005012512, "rewards/accuracies": 0.875, "rewards/chosen": -0.16614389419555664, "rewards/margins": 0.1325683295726776, "rewards/rejected": -0.29871222376823425, "step": 1027 }, { "epoch": 2.8145106091718004, "grad_norm": 2.2302193641662598, "learning_rate": 8.591780821917808e-07, "log_odds_chosen": 0.6469419598579407, "log_odds_ratio": -0.6333890557289124, "logits/chosen": 0.6379384398460388, "logits/rejected": 0.6056032776832581, "logps/chosen": -2.1481971740722656, "logps/rejected": -2.633744955062866, "loss": 1.0308, "nll_loss": 0.9674286246299744, "rewards/accuracies": 0.875, "rewards/chosen": -0.21481969952583313, "rewards/margins": 0.04855479672551155, "rewards/rejected": -0.2633745074272156, "step": 1028 }, { "epoch": 2.8172484599589325, "grad_norm": 4.3318963050842285, "learning_rate": 8.590410958904109e-07, "log_odds_chosen": -0.353326678276062, "log_odds_ratio": -1.0330464839935303, "logits/chosen": 0.7104918956756592, "logits/rejected": 0.7933351993560791, "logps/chosen": -3.2275094985961914, "logps/rejected": -2.8674354553222656, "loss": 0.9014, "nll_loss": 0.7980577945709229, "rewards/accuracies": 0.5, "rewards/chosen": -0.32275092601776123, "rewards/margins": -0.03600738197565079, "rewards/rejected": -0.28674355149269104, "step": 1029 }, { "epoch": 2.8199863107460645, "grad_norm": 2.7280616760253906, "learning_rate": 8.58904109589041e-07, "log_odds_chosen": 0.026068896055221558, "log_odds_ratio": -0.7894511818885803, "logits/chosen": 0.6711390018463135, "logits/rejected": 0.672906756401062, "logps/chosen": -2.467257261276245, "logps/rejected": -2.467982053756714, "loss": 0.9364, "nll_loss": 0.8574784994125366, "rewards/accuracies": 0.625, "rewards/chosen": -0.24672570824623108, "rewards/margins": 7.249042391777039e-05, "rewards/rejected": -0.24679820239543915, "step": 1030 }, { "epoch": 2.8227241615331966, "grad_norm": 2.6461949348449707, "learning_rate": 8.587671232876712e-07, "log_odds_chosen": 1.1345974206924438, "log_odds_ratio": -0.31582996249198914, "logits/chosen": 0.6873626112937927, "logits/rejected": 0.6576632261276245, "logps/chosen": -2.115438938140869, "logps/rejected": -3.1061530113220215, "loss": 0.8213, "nll_loss": 0.7897446155548096, "rewards/accuracies": 1.0, "rewards/chosen": -0.21154388785362244, "rewards/margins": 0.0990714281797409, "rewards/rejected": -0.31061530113220215, "step": 1031 }, { "epoch": 2.8254620123203287, "grad_norm": 3.0973379611968994, "learning_rate": 8.586301369863013e-07, "log_odds_chosen": 0.4954349994659424, "log_odds_ratio": -0.5926315784454346, "logits/chosen": 0.7337530255317688, "logits/rejected": 0.7603123188018799, "logps/chosen": -2.2986021041870117, "logps/rejected": -2.7216055393218994, "loss": 0.867, "nll_loss": 0.8077781200408936, "rewards/accuracies": 0.625, "rewards/chosen": -0.22986021637916565, "rewards/margins": 0.04230034723877907, "rewards/rejected": -0.2721605598926544, "step": 1032 }, { "epoch": 2.8281998631074607, "grad_norm": 2.683443069458008, "learning_rate": 8.584931506849314e-07, "log_odds_chosen": 0.4273231029510498, "log_odds_ratio": -0.5704712867736816, "logits/chosen": 0.7137688398361206, "logits/rejected": 0.7767952680587769, "logps/chosen": -2.4873383045196533, "logps/rejected": -2.867560386657715, "loss": 0.905, "nll_loss": 0.8479422330856323, "rewards/accuracies": 0.75, "rewards/chosen": -0.24873380362987518, "rewards/margins": 0.0380222387611866, "rewards/rejected": -0.2867560386657715, "step": 1033 }, { "epoch": 2.830937713894593, "grad_norm": 2.4282402992248535, "learning_rate": 8.583561643835616e-07, "log_odds_chosen": 0.6049476861953735, "log_odds_ratio": -0.5101942420005798, "logits/chosen": 0.43936461210250854, "logits/rejected": 0.3981615900993347, "logps/chosen": -1.890244483947754, "logps/rejected": -2.4172513484954834, "loss": 0.9503, "nll_loss": 0.8992396593093872, "rewards/accuracies": 0.875, "rewards/chosen": -0.1890244483947754, "rewards/margins": 0.052700694650411606, "rewards/rejected": -0.2417251467704773, "step": 1034 }, { "epoch": 2.833675564681725, "grad_norm": 3.8233954906463623, "learning_rate": 8.582191780821918e-07, "log_odds_chosen": 0.30646947026252747, "log_odds_ratio": -0.7701936960220337, "logits/chosen": 0.8477084636688232, "logits/rejected": 0.8687251210212708, "logps/chosen": -3.1250767707824707, "logps/rejected": -3.3908157348632812, "loss": 0.8401, "nll_loss": 0.7630622386932373, "rewards/accuracies": 0.625, "rewards/chosen": -0.31250765919685364, "rewards/margins": 0.0265739057213068, "rewards/rejected": -0.3390815556049347, "step": 1035 }, { "epoch": 2.836413415468857, "grad_norm": 2.4698872566223145, "learning_rate": 8.580821917808218e-07, "log_odds_chosen": 1.4877960681915283, "log_odds_ratio": -0.3524279296398163, "logits/chosen": 0.6978235244750977, "logits/rejected": 0.6820574998855591, "logps/chosen": -2.887577772140503, "logps/rejected": -4.3018927574157715, "loss": 0.8325, "nll_loss": 0.7972955107688904, "rewards/accuracies": 0.75, "rewards/chosen": -0.2887578010559082, "rewards/margins": 0.14143149554729462, "rewards/rejected": -0.430189311504364, "step": 1036 }, { "epoch": 2.839151266255989, "grad_norm": 2.540410041809082, "learning_rate": 8.57945205479452e-07, "log_odds_chosen": -0.34369897842407227, "log_odds_ratio": -1.1034570932388306, "logits/chosen": 0.5388389825820923, "logits/rejected": 0.5390333533287048, "logps/chosen": -2.882321357727051, "logps/rejected": -2.550600290298462, "loss": 1.0933, "nll_loss": 0.9829209446907043, "rewards/accuracies": 0.625, "rewards/chosen": -0.28823214769363403, "rewards/margins": -0.03317209333181381, "rewards/rejected": -0.2550600469112396, "step": 1037 }, { "epoch": 2.841889117043121, "grad_norm": 2.551405429840088, "learning_rate": 8.578082191780821e-07, "log_odds_chosen": 1.6598881483078003, "log_odds_ratio": -0.6035217642784119, "logits/chosen": 0.5959640145301819, "logits/rejected": 0.6071664094924927, "logps/chosen": -2.4518439769744873, "logps/rejected": -3.9735069274902344, "loss": 0.947, "nll_loss": 0.8866482377052307, "rewards/accuracies": 0.75, "rewards/chosen": -0.24518442153930664, "rewards/margins": 0.15216629207134247, "rewards/rejected": -0.3973506987094879, "step": 1038 }, { "epoch": 2.844626967830253, "grad_norm": 2.7500345706939697, "learning_rate": 8.576712328767122e-07, "log_odds_chosen": 0.7606407999992371, "log_odds_ratio": -0.5217347145080566, "logits/chosen": 0.37774598598480225, "logits/rejected": 0.25628864765167236, "logps/chosen": -1.9844791889190674, "logps/rejected": -2.648064613342285, "loss": 0.9624, "nll_loss": 0.9101905822753906, "rewards/accuracies": 0.75, "rewards/chosen": -0.19844791293144226, "rewards/margins": 0.0663585290312767, "rewards/rejected": -0.26480644941329956, "step": 1039 }, { "epoch": 2.847364818617385, "grad_norm": 2.592254638671875, "learning_rate": 8.575342465753424e-07, "log_odds_chosen": 0.051807403564453125, "log_odds_ratio": -0.752843976020813, "logits/chosen": 0.6326053738594055, "logits/rejected": 0.6631196737289429, "logps/chosen": -2.4049930572509766, "logps/rejected": -2.4130027294158936, "loss": 0.9352, "nll_loss": 0.8599456548690796, "rewards/accuracies": 0.5, "rewards/chosen": -0.24049930274486542, "rewards/margins": 0.0008009588345885277, "rewards/rejected": -0.2413002848625183, "step": 1040 }, { "epoch": 2.8501026694045173, "grad_norm": 2.589228868484497, "learning_rate": 8.573972602739725e-07, "log_odds_chosen": 0.6430939435958862, "log_odds_ratio": -0.5264932513237, "logits/chosen": 0.4563653767108917, "logits/rejected": 0.44634026288986206, "logps/chosen": -2.511326789855957, "logps/rejected": -3.0682976245880127, "loss": 0.9509, "nll_loss": 0.8982180953025818, "rewards/accuracies": 0.75, "rewards/chosen": -0.25113266706466675, "rewards/margins": 0.05569710209965706, "rewards/rejected": -0.3068297505378723, "step": 1041 }, { "epoch": 2.8528405201916494, "grad_norm": 2.537228584289551, "learning_rate": 8.572602739726027e-07, "log_odds_chosen": 0.6634070873260498, "log_odds_ratio": -0.6847553253173828, "logits/chosen": 0.8784370422363281, "logits/rejected": 0.9204476475715637, "logps/chosen": -2.74599027633667, "logps/rejected": -3.3849172592163086, "loss": 0.8729, "nll_loss": 0.8044322729110718, "rewards/accuracies": 0.75, "rewards/chosen": -0.2745990455150604, "rewards/margins": 0.06389268487691879, "rewards/rejected": -0.3384917378425598, "step": 1042 }, { "epoch": 2.8555783709787814, "grad_norm": 2.770759105682373, "learning_rate": 8.571232876712328e-07, "log_odds_chosen": -0.11235440522432327, "log_odds_ratio": -1.0297785997390747, "logits/chosen": 0.4229190945625305, "logits/rejected": 0.40622931718826294, "logps/chosen": -2.070054531097412, "logps/rejected": -1.9429378509521484, "loss": 1.0455, "nll_loss": 0.942504346370697, "rewards/accuracies": 0.875, "rewards/chosen": -0.20700545608997345, "rewards/margins": -0.012711677700281143, "rewards/rejected": -0.1942937821149826, "step": 1043 }, { "epoch": 2.8583162217659135, "grad_norm": 2.9721179008483887, "learning_rate": 8.569863013698629e-07, "log_odds_chosen": 0.1368640959262848, "log_odds_ratio": -0.7535684108734131, "logits/chosen": 0.49846720695495605, "logits/rejected": 0.5017769932746887, "logps/chosen": -2.760878086090088, "logps/rejected": -2.8683815002441406, "loss": 0.9874, "nll_loss": 0.912028431892395, "rewards/accuracies": 0.375, "rewards/chosen": -0.27608782052993774, "rewards/margins": 0.010750318877398968, "rewards/rejected": -0.2868381440639496, "step": 1044 }, { "epoch": 2.861054072553046, "grad_norm": 2.19633150100708, "learning_rate": 8.568493150684932e-07, "log_odds_chosen": 1.1222482919692993, "log_odds_ratio": -0.3576890230178833, "logits/chosen": 0.5644779205322266, "logits/rejected": 0.5140612125396729, "logps/chosen": -2.788597822189331, "logps/rejected": -3.790630578994751, "loss": 0.9883, "nll_loss": 0.9524929523468018, "rewards/accuracies": 0.75, "rewards/chosen": -0.27885979413986206, "rewards/margins": 0.10020327568054199, "rewards/rejected": -0.37906306982040405, "step": 1045 }, { "epoch": 2.863791923340178, "grad_norm": 2.627368927001953, "learning_rate": 8.567123287671233e-07, "log_odds_chosen": 0.07023929059505463, "log_odds_ratio": -0.713509202003479, "logits/chosen": 0.36452552676200867, "logits/rejected": 0.297606498003006, "logps/chosen": -2.175630807876587, "logps/rejected": -2.2464025020599365, "loss": 1.0137, "nll_loss": 0.9423900842666626, "rewards/accuracies": 0.625, "rewards/chosen": -0.21756309270858765, "rewards/margins": 0.007077165879309177, "rewards/rejected": -0.22464025020599365, "step": 1046 }, { "epoch": 2.86652977412731, "grad_norm": 2.475416898727417, "learning_rate": 8.565753424657534e-07, "log_odds_chosen": 0.769262969493866, "log_odds_ratio": -0.5002053380012512, "logits/chosen": 0.42793533205986023, "logits/rejected": 0.39068520069122314, "logps/chosen": -2.357117176055908, "logps/rejected": -3.0580945014953613, "loss": 0.9599, "nll_loss": 0.9099152684211731, "rewards/accuracies": 0.75, "rewards/chosen": -0.2357117235660553, "rewards/margins": 0.07009775191545486, "rewards/rejected": -0.30580949783325195, "step": 1047 }, { "epoch": 2.869267624914442, "grad_norm": 2.818511962890625, "learning_rate": 8.564383561643836e-07, "log_odds_chosen": 0.18580397963523865, "log_odds_ratio": -0.954158365726471, "logits/chosen": 0.6694743633270264, "logits/rejected": 0.7232701182365417, "logps/chosen": -2.5047826766967773, "logps/rejected": -2.6419906616210938, "loss": 0.9281, "nll_loss": 0.8327099680900574, "rewards/accuracies": 0.5, "rewards/chosen": -0.25047826766967773, "rewards/margins": 0.013720784336328506, "rewards/rejected": -0.26419907808303833, "step": 1048 }, { "epoch": 2.8720054757015743, "grad_norm": 2.856549024581909, "learning_rate": 8.563013698630138e-07, "log_odds_chosen": 0.3595101237297058, "log_odds_ratio": -0.6079407930374146, "logits/chosen": 0.736197829246521, "logits/rejected": 0.754986047744751, "logps/chosen": -2.4832019805908203, "logps/rejected": -2.7731504440307617, "loss": 0.8505, "nll_loss": 0.78969407081604, "rewards/accuracies": 0.5, "rewards/chosen": -0.24832019209861755, "rewards/margins": 0.028994876891374588, "rewards/rejected": -0.27731505036354065, "step": 1049 }, { "epoch": 2.8747433264887063, "grad_norm": 2.5188417434692383, "learning_rate": 8.561643835616438e-07, "log_odds_chosen": 0.1128322035074234, "log_odds_ratio": -0.6976628303527832, "logits/chosen": 0.7543500661849976, "logits/rejected": 0.8002820014953613, "logps/chosen": -2.6408958435058594, "logps/rejected": -2.7592196464538574, "loss": 0.9596, "nll_loss": 0.8897947669029236, "rewards/accuracies": 0.5, "rewards/chosen": -0.26408958435058594, "rewards/margins": 0.011832382529973984, "rewards/rejected": -0.2759220004081726, "step": 1050 }, { "epoch": 2.8774811772758384, "grad_norm": 2.2699105739593506, "learning_rate": 8.56027397260274e-07, "log_odds_chosen": 0.6943180561065674, "log_odds_ratio": -0.5144071578979492, "logits/chosen": 0.4891796112060547, "logits/rejected": 0.5364794135093689, "logps/chosen": -2.3327784538269043, "logps/rejected": -2.9807138442993164, "loss": 0.9103, "nll_loss": 0.8588382601737976, "rewards/accuracies": 0.75, "rewards/chosen": -0.2332778424024582, "rewards/margins": 0.06479353457689285, "rewards/rejected": -0.29807138442993164, "step": 1051 }, { "epoch": 2.8802190280629705, "grad_norm": 2.1306166648864746, "learning_rate": 8.558904109589041e-07, "log_odds_chosen": 0.6514631509780884, "log_odds_ratio": -0.5513622760772705, "logits/chosen": 0.41142576932907104, "logits/rejected": 0.36440664529800415, "logps/chosen": -1.8764945268630981, "logps/rejected": -2.4634222984313965, "loss": 0.9822, "nll_loss": 0.9270501136779785, "rewards/accuracies": 0.5, "rewards/chosen": -0.1876494586467743, "rewards/margins": 0.05869276076555252, "rewards/rejected": -0.2463422268629074, "step": 1052 }, { "epoch": 2.8829568788501025, "grad_norm": 2.17049503326416, "learning_rate": 8.557534246575342e-07, "log_odds_chosen": 0.7342615723609924, "log_odds_ratio": -0.44948482513427734, "logits/chosen": 0.7899267673492432, "logits/rejected": 0.7421112060546875, "logps/chosen": -1.7786794900894165, "logps/rejected": -2.4298601150512695, "loss": 0.8039, "nll_loss": 0.7589136958122253, "rewards/accuracies": 0.875, "rewards/chosen": -0.17786796391010284, "rewards/margins": 0.06511805951595306, "rewards/rejected": -0.2429860234260559, "step": 1053 }, { "epoch": 2.8856947296372346, "grad_norm": 2.3303709030151367, "learning_rate": 8.556164383561644e-07, "log_odds_chosen": 0.8838670253753662, "log_odds_ratio": -0.46768155694007874, "logits/chosen": 0.42424190044403076, "logits/rejected": 0.3655731976032257, "logps/chosen": -2.2770299911499023, "logps/rejected": -3.098503828048706, "loss": 0.9444, "nll_loss": 0.8975893259048462, "rewards/accuracies": 0.75, "rewards/chosen": -0.2277030050754547, "rewards/margins": 0.08214738965034485, "rewards/rejected": -0.30985039472579956, "step": 1054 }, { "epoch": 2.888432580424367, "grad_norm": 2.2354698181152344, "learning_rate": 8.554794520547945e-07, "log_odds_chosen": 0.7445961236953735, "log_odds_ratio": -0.4674264192581177, "logits/chosen": 0.4448454976081848, "logits/rejected": 0.3661119341850281, "logps/chosen": -1.737226128578186, "logps/rejected": -2.3726837635040283, "loss": 0.9772, "nll_loss": 0.9304128289222717, "rewards/accuracies": 0.75, "rewards/chosen": -0.17372262477874756, "rewards/margins": 0.06354574859142303, "rewards/rejected": -0.2372683584690094, "step": 1055 }, { "epoch": 2.891170431211499, "grad_norm": 2.6994540691375732, "learning_rate": 8.553424657534247e-07, "log_odds_chosen": 0.27417507767677307, "log_odds_ratio": -0.6829997301101685, "logits/chosen": 0.546266496181488, "logits/rejected": 0.5984732508659363, "logps/chosen": -2.6012794971466064, "logps/rejected": -2.8523051738739014, "loss": 0.9561, "nll_loss": 0.887836217880249, "rewards/accuracies": 0.75, "rewards/chosen": -0.2601279318332672, "rewards/margins": 0.02510259859263897, "rewards/rejected": -0.2852305471897125, "step": 1056 }, { "epoch": 2.8939082819986313, "grad_norm": 3.5589845180511475, "learning_rate": 8.552054794520548e-07, "log_odds_chosen": 0.20938722789287567, "log_odds_ratio": -0.8413891196250916, "logits/chosen": 0.5097506642341614, "logits/rejected": 0.4638121426105499, "logps/chosen": -2.774256706237793, "logps/rejected": -2.8934566974639893, "loss": 0.9367, "nll_loss": 0.8526042699813843, "rewards/accuracies": 0.625, "rewards/chosen": -0.27742570638656616, "rewards/margins": 0.011919988319277763, "rewards/rejected": -0.2893456816673279, "step": 1057 }, { "epoch": 2.8966461327857633, "grad_norm": 2.833613395690918, "learning_rate": 8.550684931506849e-07, "log_odds_chosen": 0.010178446769714355, "log_odds_ratio": -0.81451416015625, "logits/chosen": 0.6693177223205566, "logits/rejected": 0.748717725276947, "logps/chosen": -2.665689468383789, "logps/rejected": -2.7063372135162354, "loss": 0.8957, "nll_loss": 0.8142235279083252, "rewards/accuracies": 0.625, "rewards/chosen": -0.26656895875930786, "rewards/margins": 0.004064779728651047, "rewards/rejected": -0.270633727312088, "step": 1058 }, { "epoch": 2.8993839835728954, "grad_norm": 1.9393855333328247, "learning_rate": 8.549315068493151e-07, "log_odds_chosen": 0.8506313562393188, "log_odds_ratio": -0.5022950172424316, "logits/chosen": 0.2353234887123108, "logits/rejected": 0.23980097472667694, "logps/chosen": -1.8702144622802734, "logps/rejected": -2.669926404953003, "loss": 1.007, "nll_loss": 0.9567842483520508, "rewards/accuracies": 0.75, "rewards/chosen": -0.1870214343070984, "rewards/margins": 0.07997120171785355, "rewards/rejected": -0.2669926583766937, "step": 1059 }, { "epoch": 2.9021218343600275, "grad_norm": 3.7858688831329346, "learning_rate": 8.547945205479452e-07, "log_odds_chosen": -0.28592583537101746, "log_odds_ratio": -0.9221882224082947, "logits/chosen": 0.5473705530166626, "logits/rejected": 0.5081375241279602, "logps/chosen": -2.551701068878174, "logps/rejected": -2.265878677368164, "loss": 0.9681, "nll_loss": 0.8758628368377686, "rewards/accuracies": 0.375, "rewards/chosen": -0.2551701068878174, "rewards/margins": -0.028582250699400902, "rewards/rejected": -0.22658784687519073, "step": 1060 }, { "epoch": 2.9048596851471595, "grad_norm": 2.9938061237335205, "learning_rate": 8.546575342465753e-07, "log_odds_chosen": 0.6563686728477478, "log_odds_ratio": -0.5946749448776245, "logits/chosen": 0.36356696486473083, "logits/rejected": 0.36507517099380493, "logps/chosen": -2.323338031768799, "logps/rejected": -2.844797134399414, "loss": 1.0258, "nll_loss": 0.9663238525390625, "rewards/accuracies": 0.625, "rewards/chosen": -0.23233380913734436, "rewards/margins": 0.05214590206742287, "rewards/rejected": -0.28447970747947693, "step": 1061 }, { "epoch": 2.9075975359342916, "grad_norm": 3.676806926727295, "learning_rate": 8.545205479452055e-07, "log_odds_chosen": -0.11710014939308167, "log_odds_ratio": -0.8442705273628235, "logits/chosen": 0.4684092700481415, "logits/rejected": 0.4089042544364929, "logps/chosen": -2.392293930053711, "logps/rejected": -2.2669167518615723, "loss": 0.9681, "nll_loss": 0.8836603760719299, "rewards/accuracies": 0.625, "rewards/chosen": -0.23922939598560333, "rewards/margins": -0.012537715956568718, "rewards/rejected": -0.22669167816638947, "step": 1062 }, { "epoch": 2.9103353867214237, "grad_norm": 2.6452476978302, "learning_rate": 8.543835616438357e-07, "log_odds_chosen": 1.1555218696594238, "log_odds_ratio": -0.373553991317749, "logits/chosen": 0.8408747315406799, "logits/rejected": 0.8462789058685303, "logps/chosen": -2.0082883834838867, "logps/rejected": -3.0670125484466553, "loss": 0.8162, "nll_loss": 0.7788896560668945, "rewards/accuracies": 1.0, "rewards/chosen": -0.20082882046699524, "rewards/margins": 0.10587240755558014, "rewards/rejected": -0.3067012429237366, "step": 1063 }, { "epoch": 2.9130732375085557, "grad_norm": 2.1668450832366943, "learning_rate": 8.542465753424657e-07, "log_odds_chosen": 1.0260367393493652, "log_odds_ratio": -0.40039244294166565, "logits/chosen": 0.6129329204559326, "logits/rejected": 0.561559796333313, "logps/chosen": -1.596630573272705, "logps/rejected": -2.5171689987182617, "loss": 0.882, "nll_loss": 0.8419831395149231, "rewards/accuracies": 1.0, "rewards/chosen": -0.15966303646564484, "rewards/margins": 0.0920538604259491, "rewards/rejected": -0.2517169117927551, "step": 1064 }, { "epoch": 2.915811088295688, "grad_norm": 2.312100410461426, "learning_rate": 8.541095890410959e-07, "log_odds_chosen": 0.8804411292076111, "log_odds_ratio": -0.4254527688026428, "logits/chosen": 0.5038115382194519, "logits/rejected": 0.45398950576782227, "logps/chosen": -1.5070058107376099, "logps/rejected": -2.265648365020752, "loss": 0.9537, "nll_loss": 0.9111485481262207, "rewards/accuracies": 0.875, "rewards/chosen": -0.15070059895515442, "rewards/margins": 0.0758642703294754, "rewards/rejected": -0.22656485438346863, "step": 1065 }, { "epoch": 2.91854893908282, "grad_norm": 2.2963128089904785, "learning_rate": 8.53972602739726e-07, "log_odds_chosen": 1.3439481258392334, "log_odds_ratio": -0.40419769287109375, "logits/chosen": 0.9089482426643372, "logits/rejected": 0.8453500866889954, "logps/chosen": -2.388052225112915, "logps/rejected": -3.6148738861083984, "loss": 0.7955, "nll_loss": 0.7550705671310425, "rewards/accuracies": 0.875, "rewards/chosen": -0.23880524933338165, "rewards/margins": 0.12268217653036118, "rewards/rejected": -0.36148738861083984, "step": 1066 }, { "epoch": 2.921286789869952, "grad_norm": 2.6741323471069336, "learning_rate": 8.538356164383561e-07, "log_odds_chosen": 0.830187201499939, "log_odds_ratio": -0.5800446271896362, "logits/chosen": 0.6945044994354248, "logits/rejected": 0.5915518999099731, "logps/chosen": -2.253404140472412, "logps/rejected": -3.0014376640319824, "loss": 0.9707, "nll_loss": 0.9127404689788818, "rewards/accuracies": 0.75, "rewards/chosen": -0.22534041106700897, "rewards/margins": 0.07480334490537643, "rewards/rejected": -0.3001437783241272, "step": 1067 }, { "epoch": 2.924024640657084, "grad_norm": 2.706080198287964, "learning_rate": 8.536986301369863e-07, "log_odds_chosen": 0.32275378704071045, "log_odds_ratio": -0.6391281485557556, "logits/chosen": 0.527419924736023, "logits/rejected": 0.5811318755149841, "logps/chosen": -2.194782018661499, "logps/rejected": -2.434293746948242, "loss": 0.9052, "nll_loss": 0.8412863612174988, "rewards/accuracies": 0.625, "rewards/chosen": -0.21947820484638214, "rewards/margins": 0.023951176553964615, "rewards/rejected": -0.24342939257621765, "step": 1068 }, { "epoch": 2.926762491444216, "grad_norm": 2.323302745819092, "learning_rate": 8.535616438356164e-07, "log_odds_chosen": 0.2769085764884949, "log_odds_ratio": -0.5815432667732239, "logits/chosen": 0.3925849497318268, "logits/rejected": 0.33116886019706726, "logps/chosen": -2.150292158126831, "logps/rejected": -2.384997844696045, "loss": 1.1017, "nll_loss": 1.043563723564148, "rewards/accuracies": 0.75, "rewards/chosen": -0.21502920985221863, "rewards/margins": 0.023470580577850342, "rewards/rejected": -0.23849979043006897, "step": 1069 }, { "epoch": 2.929500342231348, "grad_norm": 2.2754738330841064, "learning_rate": 8.534246575342465e-07, "log_odds_chosen": 0.41935738921165466, "log_odds_ratio": -0.5682578086853027, "logits/chosen": 0.4273279905319214, "logits/rejected": 0.39657193422317505, "logps/chosen": -2.373955011367798, "logps/rejected": -2.7169456481933594, "loss": 0.9056, "nll_loss": 0.848802924156189, "rewards/accuracies": 0.5, "rewards/chosen": -0.2373955100774765, "rewards/margins": 0.03429907560348511, "rewards/rejected": -0.2716945707798004, "step": 1070 }, { "epoch": 2.9322381930184807, "grad_norm": 2.305802822113037, "learning_rate": 8.532876712328767e-07, "log_odds_chosen": 0.5564114451408386, "log_odds_ratio": -0.5249685645103455, "logits/chosen": 0.41824448108673096, "logits/rejected": 0.34888315200805664, "logps/chosen": -2.8256642818450928, "logps/rejected": -3.322047710418701, "loss": 1.0199, "nll_loss": 0.9674186706542969, "rewards/accuracies": 0.625, "rewards/chosen": -0.2825664281845093, "rewards/margins": 0.04963833838701248, "rewards/rejected": -0.33220475912094116, "step": 1071 }, { "epoch": 2.9349760438056127, "grad_norm": 2.588563919067383, "learning_rate": 8.531506849315068e-07, "log_odds_chosen": 1.293926477432251, "log_odds_ratio": -0.4748861491680145, "logits/chosen": 0.4147610366344452, "logits/rejected": 0.30232229828834534, "logps/chosen": -1.7820196151733398, "logps/rejected": -2.947521448135376, "loss": 0.9699, "nll_loss": 0.9223806858062744, "rewards/accuracies": 0.875, "rewards/chosen": -0.17820194363594055, "rewards/margins": 0.11655019223690033, "rewards/rejected": -0.2947521507740021, "step": 1072 }, { "epoch": 2.937713894592745, "grad_norm": 2.0782711505889893, "learning_rate": 8.53013698630137e-07, "log_odds_chosen": 0.39132487773895264, "log_odds_ratio": -0.5557683706283569, "logits/chosen": 0.4814260005950928, "logits/rejected": 0.4665309488773346, "logps/chosen": -2.1632981300354004, "logps/rejected": -2.5015695095062256, "loss": 0.9423, "nll_loss": 0.8867093920707703, "rewards/accuracies": 0.75, "rewards/chosen": -0.21632981300354004, "rewards/margins": 0.03382713347673416, "rewards/rejected": -0.2501569390296936, "step": 1073 }, { "epoch": 2.940451745379877, "grad_norm": 2.422576427459717, "learning_rate": 8.528767123287671e-07, "log_odds_chosen": 0.8537580966949463, "log_odds_ratio": -0.4726113975048065, "logits/chosen": 0.6891146302223206, "logits/rejected": 0.712322473526001, "logps/chosen": -2.0687856674194336, "logps/rejected": -2.874682903289795, "loss": 0.9048, "nll_loss": 0.8575875759124756, "rewards/accuracies": 0.875, "rewards/chosen": -0.20687858760356903, "rewards/margins": 0.08058971166610718, "rewards/rejected": -0.287468284368515, "step": 1074 }, { "epoch": 2.943189596167009, "grad_norm": 2.325932741165161, "learning_rate": 8.527397260273972e-07, "log_odds_chosen": 0.3549419641494751, "log_odds_ratio": -0.645366370677948, "logits/chosen": 0.47286033630371094, "logits/rejected": 0.3939899504184723, "logps/chosen": -2.046754837036133, "logps/rejected": -2.325840950012207, "loss": 1.0198, "nll_loss": 0.9552291035652161, "rewards/accuracies": 0.625, "rewards/chosen": -0.20467546582221985, "rewards/margins": 0.02790861949324608, "rewards/rejected": -0.23258408904075623, "step": 1075 }, { "epoch": 2.945927446954141, "grad_norm": 2.8783297538757324, "learning_rate": 8.526027397260274e-07, "log_odds_chosen": 0.24262453615665436, "log_odds_ratio": -0.7753088474273682, "logits/chosen": 0.5370913147926331, "logits/rejected": 0.5133668780326843, "logps/chosen": -2.064589738845825, "logps/rejected": -2.3123061656951904, "loss": 0.9666, "nll_loss": 0.88902348279953, "rewards/accuracies": 0.375, "rewards/chosen": -0.20645898580551147, "rewards/margins": 0.024771636351943016, "rewards/rejected": -0.23123063147068024, "step": 1076 }, { "epoch": 2.948665297741273, "grad_norm": 2.0561861991882324, "learning_rate": 8.524657534246575e-07, "log_odds_chosen": 1.4100645780563354, "log_odds_ratio": -0.3265669643878937, "logits/chosen": 0.32825353741645813, "logits/rejected": 0.2012401819229126, "logps/chosen": -1.9125452041625977, "logps/rejected": -3.1774134635925293, "loss": 0.9939, "nll_loss": 0.961246907711029, "rewards/accuracies": 0.875, "rewards/chosen": -0.19125454127788544, "rewards/margins": 0.12648680806159973, "rewards/rejected": -0.317741334438324, "step": 1077 }, { "epoch": 2.951403148528405, "grad_norm": 2.305495262145996, "learning_rate": 8.523287671232876e-07, "log_odds_chosen": 0.24065697193145752, "log_odds_ratio": -0.6345599889755249, "logits/chosen": 0.5923588275909424, "logits/rejected": 0.5818220376968384, "logps/chosen": -2.1036736965179443, "logps/rejected": -2.2937116622924805, "loss": 0.9007, "nll_loss": 0.8372423648834229, "rewards/accuracies": 0.75, "rewards/chosen": -0.2103673666715622, "rewards/margins": 0.019003797322511673, "rewards/rejected": -0.22937114536762238, "step": 1078 }, { "epoch": 2.954140999315537, "grad_norm": 3.63301682472229, "learning_rate": 8.521917808219178e-07, "log_odds_chosen": -0.6122699975967407, "log_odds_ratio": -1.3747786283493042, "logits/chosen": 0.7846870422363281, "logits/rejected": 0.8255990147590637, "logps/chosen": -3.392050266265869, "logps/rejected": -2.745906352996826, "loss": 1.0374, "nll_loss": 0.8999414443969727, "rewards/accuracies": 0.625, "rewards/chosen": -0.3392050266265869, "rewards/margins": -0.06461440771818161, "rewards/rejected": -0.2745906114578247, "step": 1079 }, { "epoch": 2.9568788501026693, "grad_norm": 3.3931777477264404, "learning_rate": 8.52054794520548e-07, "log_odds_chosen": 1.0229946374893188, "log_odds_ratio": -0.4525204300880432, "logits/chosen": 0.7511001825332642, "logits/rejected": 0.7765413522720337, "logps/chosen": -2.2070465087890625, "logps/rejected": -3.1058120727539062, "loss": 0.8494, "nll_loss": 0.8041856288909912, "rewards/accuracies": 0.625, "rewards/chosen": -0.22070464491844177, "rewards/margins": 0.08987656980752945, "rewards/rejected": -0.3105812072753906, "step": 1080 }, { "epoch": 2.9596167008898018, "grad_norm": 3.056647777557373, "learning_rate": 8.51917808219178e-07, "log_odds_chosen": 0.4380301237106323, "log_odds_ratio": -0.6121383905410767, "logits/chosen": 0.5536503791809082, "logits/rejected": 0.550029993057251, "logps/chosen": -2.6530399322509766, "logps/rejected": -3.060065269470215, "loss": 0.9619, "nll_loss": 0.90070641040802, "rewards/accuracies": 0.5, "rewards/chosen": -0.26530396938323975, "rewards/margins": 0.04070257395505905, "rewards/rejected": -0.3060065507888794, "step": 1081 }, { "epoch": 2.962354551676934, "grad_norm": 5.425618648529053, "learning_rate": 8.517808219178082e-07, "log_odds_chosen": 0.1945626437664032, "log_odds_ratio": -0.942165732383728, "logits/chosen": 0.7797508835792542, "logits/rejected": 0.8413451910018921, "logps/chosen": -2.9852938652038574, "logps/rejected": -3.1226303577423096, "loss": 0.9295, "nll_loss": 0.8352998495101929, "rewards/accuracies": 0.75, "rewards/chosen": -0.29852938652038574, "rewards/margins": 0.013733655214309692, "rewards/rejected": -0.3122630715370178, "step": 1082 }, { "epoch": 2.965092402464066, "grad_norm": 2.372683525085449, "learning_rate": 8.516438356164383e-07, "log_odds_chosen": 0.6122283935546875, "log_odds_ratio": -0.5333516597747803, "logits/chosen": 0.6105042099952698, "logits/rejected": 0.6169289350509644, "logps/chosen": -2.5825984477996826, "logps/rejected": -3.1293842792510986, "loss": 0.8404, "nll_loss": 0.7870664000511169, "rewards/accuracies": 0.625, "rewards/chosen": -0.2582598626613617, "rewards/margins": 0.05467856675386429, "rewards/rejected": -0.3129384517669678, "step": 1083 }, { "epoch": 2.967830253251198, "grad_norm": 3.0081684589385986, "learning_rate": 8.515068493150684e-07, "log_odds_chosen": 1.215430498123169, "log_odds_ratio": -0.3352040648460388, "logits/chosen": 0.6723618507385254, "logits/rejected": 0.6839698553085327, "logps/chosen": -2.417545795440674, "logps/rejected": -3.5163400173187256, "loss": 0.9458, "nll_loss": 0.9122369289398193, "rewards/accuracies": 0.875, "rewards/chosen": -0.24175457656383514, "rewards/margins": 0.10987941920757294, "rewards/rejected": -0.35163402557373047, "step": 1084 }, { "epoch": 2.97056810403833, "grad_norm": 2.832760810852051, "learning_rate": 8.513698630136986e-07, "log_odds_chosen": 0.5411423444747925, "log_odds_ratio": -0.5834000110626221, "logits/chosen": 0.4779115617275238, "logits/rejected": 0.48723745346069336, "logps/chosen": -2.1503429412841797, "logps/rejected": -2.5979456901550293, "loss": 1.0096, "nll_loss": 0.9512637853622437, "rewards/accuracies": 0.75, "rewards/chosen": -0.21503427624702454, "rewards/margins": 0.04476030170917511, "rewards/rejected": -0.25979459285736084, "step": 1085 }, { "epoch": 2.973305954825462, "grad_norm": 3.366335391998291, "learning_rate": 8.512328767123287e-07, "log_odds_chosen": 0.7391207218170166, "log_odds_ratio": -0.6785234212875366, "logits/chosen": 0.31760385632514954, "logits/rejected": 0.3088700473308563, "logps/chosen": -2.109473466873169, "logps/rejected": -2.632366418838501, "loss": 1.0236, "nll_loss": 0.9557691216468811, "rewards/accuracies": 0.75, "rewards/chosen": -0.21094736456871033, "rewards/margins": 0.052289292216300964, "rewards/rejected": -0.2632366418838501, "step": 1086 }, { "epoch": 2.976043805612594, "grad_norm": 3.450321674346924, "learning_rate": 8.510958904109589e-07, "log_odds_chosen": 0.7031643986701965, "log_odds_ratio": -0.4798179268836975, "logits/chosen": 0.849448561668396, "logits/rejected": 0.8721650838851929, "logps/chosen": -2.619922637939453, "logps/rejected": -3.2810401916503906, "loss": 0.8935, "nll_loss": 0.8455407619476318, "rewards/accuracies": 0.875, "rewards/chosen": -0.26199227571487427, "rewards/margins": 0.06611177325248718, "rewards/rejected": -0.32810401916503906, "step": 1087 }, { "epoch": 2.9787816563997263, "grad_norm": 3.3048903942108154, "learning_rate": 8.50958904109589e-07, "log_odds_chosen": 0.47659024596214294, "log_odds_ratio": -0.6614441275596619, "logits/chosen": 0.741132378578186, "logits/rejected": 0.7202996611595154, "logps/chosen": -2.54172945022583, "logps/rejected": -2.9409220218658447, "loss": 0.8613, "nll_loss": 0.7951462268829346, "rewards/accuracies": 0.875, "rewards/chosen": -0.2541729211807251, "rewards/margins": 0.03991929069161415, "rewards/rejected": -0.29409223794937134, "step": 1088 }, { "epoch": 2.9815195071868583, "grad_norm": 2.6336801052093506, "learning_rate": 8.508219178082191e-07, "log_odds_chosen": 1.1770315170288086, "log_odds_ratio": -0.40374279022216797, "logits/chosen": 0.4725770056247711, "logits/rejected": 0.43776997923851013, "logps/chosen": -2.162334442138672, "logps/rejected": -3.2745041847229004, "loss": 1.0262, "nll_loss": 0.9858729839324951, "rewards/accuracies": 0.875, "rewards/chosen": -0.21623343229293823, "rewards/margins": 0.11121699959039688, "rewards/rejected": -0.3274504244327545, "step": 1089 }, { "epoch": 2.9842573579739904, "grad_norm": 2.3342723846435547, "learning_rate": 8.506849315068493e-07, "log_odds_chosen": 0.25901520252227783, "log_odds_ratio": -0.6900037527084351, "logits/chosen": 0.6200411319732666, "logits/rejected": 0.5960046648979187, "logps/chosen": -1.9306554794311523, "logps/rejected": -2.1505579948425293, "loss": 0.9067, "nll_loss": 0.8376786112785339, "rewards/accuracies": 0.75, "rewards/chosen": -0.19306553900241852, "rewards/margins": 0.021990273147821426, "rewards/rejected": -0.21505582332611084, "step": 1090 }, { "epoch": 2.9869952087611225, "grad_norm": 3.2861874103546143, "learning_rate": 8.505479452054794e-07, "log_odds_chosen": 0.371949702501297, "log_odds_ratio": -0.7060700058937073, "logits/chosen": 0.5691366791725159, "logits/rejected": 0.5285147428512573, "logps/chosen": -1.8071162700653076, "logps/rejected": -2.1572630405426025, "loss": 0.9457, "nll_loss": 0.875054121017456, "rewards/accuracies": 0.75, "rewards/chosen": -0.18071162700653076, "rewards/margins": 0.03501468896865845, "rewards/rejected": -0.21572630107402802, "step": 1091 }, { "epoch": 2.9897330595482545, "grad_norm": 2.8531880378723145, "learning_rate": 8.504109589041095e-07, "log_odds_chosen": -0.09886468201875687, "log_odds_ratio": -0.8067007064819336, "logits/chosen": 0.6053104400634766, "logits/rejected": 0.5694512724876404, "logps/chosen": -2.608085870742798, "logps/rejected": -2.4884815216064453, "loss": 0.9907, "nll_loss": 0.9100794792175293, "rewards/accuracies": 0.375, "rewards/chosen": -0.2608086168766022, "rewards/margins": -0.011960442177951336, "rewards/rejected": -0.24884817004203796, "step": 1092 }, { "epoch": 2.9924709103353866, "grad_norm": 2.7960853576660156, "learning_rate": 8.502739726027397e-07, "log_odds_chosen": 1.167872428894043, "log_odds_ratio": -0.5832054018974304, "logits/chosen": 0.8685106039047241, "logits/rejected": 0.9679983854293823, "logps/chosen": -2.0199010372161865, "logps/rejected": -3.0282187461853027, "loss": 0.8649, "nll_loss": 0.8065908551216125, "rewards/accuracies": 0.75, "rewards/chosen": -0.20199009776115417, "rewards/margins": 0.10083179920911789, "rewards/rejected": -0.3028218746185303, "step": 1093 }, { "epoch": 2.9952087611225187, "grad_norm": 2.614630937576294, "learning_rate": 8.501369863013699e-07, "log_odds_chosen": 0.4323228597640991, "log_odds_ratio": -0.5226815938949585, "logits/chosen": 0.5457054376602173, "logits/rejected": 0.548464834690094, "logps/chosen": -2.694577693939209, "logps/rejected": -3.085667371749878, "loss": 0.8971, "nll_loss": 0.8448187112808228, "rewards/accuracies": 0.75, "rewards/chosen": -0.26945775747299194, "rewards/margins": 0.0391089953482151, "rewards/rejected": -0.30856674909591675, "step": 1094 }, { "epoch": 2.9979466119096507, "grad_norm": 2.352935552597046, "learning_rate": 8.499999999999999e-07, "log_odds_chosen": 0.617384672164917, "log_odds_ratio": -0.6278592944145203, "logits/chosen": 0.5149610638618469, "logits/rejected": 0.44373267889022827, "logps/chosen": -2.6093835830688477, "logps/rejected": -3.181081771850586, "loss": 0.9667, "nll_loss": 0.9039394855499268, "rewards/accuracies": 0.75, "rewards/chosen": -0.2609383463859558, "rewards/margins": 0.05716982111334801, "rewards/rejected": -0.3181081712245941, "step": 1095 }, { "epoch": 3.0006844626967832, "grad_norm": 2.6116104125976562, "learning_rate": 8.498630136986301e-07, "log_odds_chosen": 0.4096830189228058, "log_odds_ratio": -0.582655131816864, "logits/chosen": 0.8036462664604187, "logits/rejected": 0.8236038088798523, "logps/chosen": -2.428387403488159, "logps/rejected": -2.786043167114258, "loss": 0.9023, "nll_loss": 0.8439987301826477, "rewards/accuracies": 0.75, "rewards/chosen": -0.2428387552499771, "rewards/margins": 0.03576556593179703, "rewards/rejected": -0.27860432863235474, "step": 1096 }, { "epoch": 3.0034223134839153, "grad_norm": 2.8754587173461914, "learning_rate": 8.497260273972602e-07, "log_odds_chosen": 0.4936824142932892, "log_odds_ratio": -0.7022282481193542, "logits/chosen": 0.9135815501213074, "logits/rejected": 0.945670485496521, "logps/chosen": -2.536090850830078, "logps/rejected": -2.9459140300750732, "loss": 0.8916, "nll_loss": 0.8214091062545776, "rewards/accuracies": 0.5, "rewards/chosen": -0.2536090910434723, "rewards/margins": 0.04098230600357056, "rewards/rejected": -0.29459136724472046, "step": 1097 }, { "epoch": 3.0061601642710474, "grad_norm": 2.488737106323242, "learning_rate": 8.495890410958903e-07, "log_odds_chosen": 0.8355370759963989, "log_odds_ratio": -0.6116797924041748, "logits/chosen": 0.45538330078125, "logits/rejected": 0.4131176471710205, "logps/chosen": -2.047895908355713, "logps/rejected": -2.7134904861450195, "loss": 0.8262, "nll_loss": 0.7650725245475769, "rewards/accuracies": 0.625, "rewards/chosen": -0.20478960871696472, "rewards/margins": 0.06655945628881454, "rewards/rejected": -0.27134907245635986, "step": 1098 }, { "epoch": 3.0088980150581794, "grad_norm": 2.691727638244629, "learning_rate": 8.494520547945205e-07, "log_odds_chosen": 1.2003743648529053, "log_odds_ratio": -0.5705018043518066, "logits/chosen": 0.36500513553619385, "logits/rejected": 0.28246161341667175, "logps/chosen": -2.431340456008911, "logps/rejected": -3.5628578662872314, "loss": 0.9099, "nll_loss": 0.8528412580490112, "rewards/accuracies": 0.75, "rewards/chosen": -0.2431340515613556, "rewards/margins": 0.11315170675516129, "rewards/rejected": -0.3562857508659363, "step": 1099 }, { "epoch": 3.0116358658453115, "grad_norm": 2.0563747882843018, "learning_rate": 8.493150684931506e-07, "log_odds_chosen": 0.4889274835586548, "log_odds_ratio": -0.500011146068573, "logits/chosen": 0.4324374794960022, "logits/rejected": 0.2828126549720764, "logps/chosen": -1.9680527448654175, "logps/rejected": -2.3967275619506836, "loss": 0.9793, "nll_loss": 0.9293131232261658, "rewards/accuracies": 0.875, "rewards/chosen": -0.19680528342723846, "rewards/margins": 0.04286748170852661, "rewards/rejected": -0.23967275023460388, "step": 1100 }, { "epoch": 3.0143737166324436, "grad_norm": 1.8957245349884033, "learning_rate": 8.491780821917808e-07, "log_odds_chosen": 1.751338005065918, "log_odds_ratio": -0.23294907808303833, "logits/chosen": 0.4385201334953308, "logits/rejected": 0.33049988746643066, "logps/chosen": -2.0880675315856934, "logps/rejected": -3.7166380882263184, "loss": 0.9046, "nll_loss": 0.8812742233276367, "rewards/accuracies": 1.0, "rewards/chosen": -0.20880675315856934, "rewards/margins": 0.1628570556640625, "rewards/rejected": -0.37166380882263184, "step": 1101 }, { "epoch": 3.0171115674195756, "grad_norm": 2.206420660018921, "learning_rate": 8.490410958904109e-07, "log_odds_chosen": 0.4148520827293396, "log_odds_ratio": -0.5590669512748718, "logits/chosen": 0.6016066670417786, "logits/rejected": 0.5641297698020935, "logps/chosen": -1.7732511758804321, "logps/rejected": -2.1274585723876953, "loss": 0.9185, "nll_loss": 0.8626154661178589, "rewards/accuracies": 0.75, "rewards/chosen": -0.17732511460781097, "rewards/margins": 0.0354207344353199, "rewards/rejected": -0.21274584531784058, "step": 1102 }, { "epoch": 3.0198494182067077, "grad_norm": 2.238025426864624, "learning_rate": 8.48904109589041e-07, "log_odds_chosen": 0.5505889654159546, "log_odds_ratio": -0.5231579542160034, "logits/chosen": 0.5722850561141968, "logits/rejected": 0.5209581255912781, "logps/chosen": -1.9766887426376343, "logps/rejected": -2.4573183059692383, "loss": 0.8755, "nll_loss": 0.8231510519981384, "rewards/accuracies": 0.625, "rewards/chosen": -0.1976688802242279, "rewards/margins": 0.04806295782327652, "rewards/rejected": -0.24573184549808502, "step": 1103 }, { "epoch": 3.02258726899384, "grad_norm": 3.2236766815185547, "learning_rate": 8.487671232876712e-07, "log_odds_chosen": 0.769812822341919, "log_odds_ratio": -0.7871295213699341, "logits/chosen": 0.7237274050712585, "logits/rejected": 0.727869987487793, "logps/chosen": -2.6657893657684326, "logps/rejected": -3.364189624786377, "loss": 1.064, "nll_loss": 0.9853276014328003, "rewards/accuracies": 0.5, "rewards/chosen": -0.26657894253730774, "rewards/margins": 0.06984003633260727, "rewards/rejected": -0.3364189863204956, "step": 1104 }, { "epoch": 3.025325119780972, "grad_norm": 1.9424753189086914, "learning_rate": 8.486301369863013e-07, "log_odds_chosen": 1.3078396320343018, "log_odds_ratio": -0.3570164442062378, "logits/chosen": 0.40942028164863586, "logits/rejected": 0.2660563588142395, "logps/chosen": -1.7161449193954468, "logps/rejected": -2.8880889415740967, "loss": 0.9333, "nll_loss": 0.8976337909698486, "rewards/accuracies": 0.875, "rewards/chosen": -0.17161449790000916, "rewards/margins": 0.11719441413879395, "rewards/rejected": -0.2888088822364807, "step": 1105 }, { "epoch": 3.028062970568104, "grad_norm": 2.499863624572754, "learning_rate": 8.484931506849314e-07, "log_odds_chosen": 0.9816054105758667, "log_odds_ratio": -0.4492323398590088, "logits/chosen": 0.864632785320282, "logits/rejected": 0.888733983039856, "logps/chosen": -3.105703830718994, "logps/rejected": -4.044816017150879, "loss": 0.9318, "nll_loss": 0.8868350386619568, "rewards/accuracies": 0.75, "rewards/chosen": -0.3105703890323639, "rewards/margins": 0.09391121566295624, "rewards/rejected": -0.40448158979415894, "step": 1106 }, { "epoch": 3.030800821355236, "grad_norm": 2.5012452602386475, "learning_rate": 8.483561643835616e-07, "log_odds_chosen": 0.5306471586227417, "log_odds_ratio": -0.6524181365966797, "logits/chosen": 0.8197237253189087, "logits/rejected": 0.7927792072296143, "logps/chosen": -2.499685287475586, "logps/rejected": -2.948401689529419, "loss": 0.8613, "nll_loss": 0.7961045503616333, "rewards/accuracies": 0.875, "rewards/chosen": -0.2499685287475586, "rewards/margins": 0.04487163946032524, "rewards/rejected": -0.2948401868343353, "step": 1107 }, { "epoch": 3.033538672142368, "grad_norm": 2.1199569702148438, "learning_rate": 8.482191780821918e-07, "log_odds_chosen": 0.8242374658584595, "log_odds_ratio": -0.3994162678718567, "logits/chosen": 0.6816278696060181, "logits/rejected": 0.5972529053688049, "logps/chosen": -1.6718881130218506, "logps/rejected": -2.372241973876953, "loss": 0.9202, "nll_loss": 0.880256712436676, "rewards/accuracies": 1.0, "rewards/chosen": -0.16718879342079163, "rewards/margins": 0.07003539055585861, "rewards/rejected": -0.23722419142723083, "step": 1108 }, { "epoch": 3.0362765229295006, "grad_norm": 2.2213268280029297, "learning_rate": 8.480821917808218e-07, "log_odds_chosen": 1.1349143981933594, "log_odds_ratio": -0.4287078380584717, "logits/chosen": 0.8912521004676819, "logits/rejected": 0.9236114025115967, "logps/chosen": -1.8906407356262207, "logps/rejected": -2.944417715072632, "loss": 0.754, "nll_loss": 0.7111613750457764, "rewards/accuracies": 0.75, "rewards/chosen": -0.18906408548355103, "rewards/margins": 0.1053776741027832, "rewards/rejected": -0.2944417893886566, "step": 1109 }, { "epoch": 3.0390143737166326, "grad_norm": 2.4142496585845947, "learning_rate": 8.47945205479452e-07, "log_odds_chosen": 0.8155400156974792, "log_odds_ratio": -0.551111102104187, "logits/chosen": 0.6694037318229675, "logits/rejected": 0.6356533169746399, "logps/chosen": -2.343021869659424, "logps/rejected": -3.094073534011841, "loss": 0.9408, "nll_loss": 0.8856850862503052, "rewards/accuracies": 0.75, "rewards/chosen": -0.23430222272872925, "rewards/margins": 0.07510515302419662, "rewards/rejected": -0.3094073534011841, "step": 1110 }, { "epoch": 3.0417522245037647, "grad_norm": 5.770945072174072, "learning_rate": 8.478082191780822e-07, "log_odds_chosen": -0.21311143040657043, "log_odds_ratio": -0.8965083360671997, "logits/chosen": 0.8730108737945557, "logits/rejected": 0.9099056720733643, "logps/chosen": -2.876617431640625, "logps/rejected": -2.6494226455688477, "loss": 0.9597, "nll_loss": 0.8700212240219116, "rewards/accuracies": 0.5, "rewards/chosen": -0.28766173124313354, "rewards/margins": -0.022719498723745346, "rewards/rejected": -0.2649422585964203, "step": 1111 }, { "epoch": 3.0444900752908968, "grad_norm": 2.358003854751587, "learning_rate": 8.476712328767122e-07, "log_odds_chosen": 0.6561554670333862, "log_odds_ratio": -0.5331440567970276, "logits/chosen": 0.6868943572044373, "logits/rejected": 0.6673057675361633, "logps/chosen": -1.8945724964141846, "logps/rejected": -2.4104225635528564, "loss": 0.8944, "nll_loss": 0.8411004543304443, "rewards/accuracies": 0.625, "rewards/chosen": -0.1894572675228119, "rewards/margins": 0.05158502608537674, "rewards/rejected": -0.24104227125644684, "step": 1112 }, { "epoch": 3.047227926078029, "grad_norm": 2.2415215969085693, "learning_rate": 8.475342465753424e-07, "log_odds_chosen": 0.7331098318099976, "log_odds_ratio": -0.4184131622314453, "logits/chosen": 0.47694480419158936, "logits/rejected": 0.3745982050895691, "logps/chosen": -1.641777753829956, "logps/rejected": -2.276236057281494, "loss": 0.9092, "nll_loss": 0.8673272728919983, "rewards/accuracies": 1.0, "rewards/chosen": -0.1641777753829956, "rewards/margins": 0.0634458065032959, "rewards/rejected": -0.2276235818862915, "step": 1113 }, { "epoch": 3.049965776865161, "grad_norm": 2.325895071029663, "learning_rate": 8.473972602739725e-07, "log_odds_chosen": 0.7042010426521301, "log_odds_ratio": -0.4543130397796631, "logits/chosen": 0.5806267857551575, "logits/rejected": 0.5612154006958008, "logps/chosen": -2.1765058040618896, "logps/rejected": -2.8418636322021484, "loss": 0.8983, "nll_loss": 0.8528890013694763, "rewards/accuracies": 0.875, "rewards/chosen": -0.2176506072282791, "rewards/margins": 0.06653578579425812, "rewards/rejected": -0.28418636322021484, "step": 1114 }, { "epoch": 3.052703627652293, "grad_norm": 2.1461739540100098, "learning_rate": 8.472602739726027e-07, "log_odds_chosen": 1.1415491104125977, "log_odds_ratio": -0.4592435657978058, "logits/chosen": 0.4969262480735779, "logits/rejected": 0.4113892614841461, "logps/chosen": -2.0024948120117188, "logps/rejected": -3.0583763122558594, "loss": 0.9367, "nll_loss": 0.8908208012580872, "rewards/accuracies": 0.625, "rewards/chosen": -0.20024946331977844, "rewards/margins": 0.10558813810348511, "rewards/rejected": -0.30583763122558594, "step": 1115 }, { "epoch": 3.055441478439425, "grad_norm": 2.3425581455230713, "learning_rate": 8.471232876712328e-07, "log_odds_chosen": 0.5998102426528931, "log_odds_ratio": -0.6733768582344055, "logits/chosen": 0.5734869837760925, "logits/rejected": 0.5452003479003906, "logps/chosen": -2.1488876342773438, "logps/rejected": -2.6601409912109375, "loss": 0.8772, "nll_loss": 0.8098466396331787, "rewards/accuracies": 0.875, "rewards/chosen": -0.21488875150680542, "rewards/margins": 0.05112535506486893, "rewards/rejected": -0.26601409912109375, "step": 1116 }, { "epoch": 3.058179329226557, "grad_norm": 2.918379306793213, "learning_rate": 8.469863013698629e-07, "log_odds_chosen": 0.8908212780952454, "log_odds_ratio": -0.40221887826919556, "logits/chosen": 1.1280295848846436, "logits/rejected": 1.154164433479309, "logps/chosen": -3.091243267059326, "logps/rejected": -3.9383485317230225, "loss": 0.7876, "nll_loss": 0.747418999671936, "rewards/accuracies": 1.0, "rewards/chosen": -0.30912432074546814, "rewards/margins": 0.08471055328845978, "rewards/rejected": -0.3938348889350891, "step": 1117 }, { "epoch": 3.060917180013689, "grad_norm": 2.427968978881836, "learning_rate": 8.468493150684931e-07, "log_odds_chosen": 0.8959257006645203, "log_odds_ratio": -0.4113832116127014, "logits/chosen": 0.35055306553840637, "logits/rejected": 0.26601284742355347, "logps/chosen": -1.5867815017700195, "logps/rejected": -2.3641045093536377, "loss": 0.9528, "nll_loss": 0.9116508960723877, "rewards/accuracies": 0.875, "rewards/chosen": -0.15867814421653748, "rewards/margins": 0.07773231714963913, "rewards/rejected": -0.236410453915596, "step": 1118 }, { "epoch": 3.0636550308008212, "grad_norm": 2.553760051727295, "learning_rate": 8.467123287671232e-07, "log_odds_chosen": 0.34403547644615173, "log_odds_ratio": -0.5716643929481506, "logits/chosen": 0.5858017802238464, "logits/rejected": 0.46605080366134644, "logps/chosen": -1.5465960502624512, "logps/rejected": -1.835071325302124, "loss": 1.0093, "nll_loss": 0.9521438479423523, "rewards/accuracies": 0.75, "rewards/chosen": -0.15465959906578064, "rewards/margins": 0.02884751744568348, "rewards/rejected": -0.18350712954998016, "step": 1119 }, { "epoch": 3.0663928815879533, "grad_norm": 3.040395736694336, "learning_rate": 8.465753424657533e-07, "log_odds_chosen": 0.5441417694091797, "log_odds_ratio": -0.6221681833267212, "logits/chosen": 0.9289951324462891, "logits/rejected": 0.973838210105896, "logps/chosen": -2.86741042137146, "logps/rejected": -3.3655872344970703, "loss": 0.8201, "nll_loss": 0.7578819394111633, "rewards/accuracies": 0.75, "rewards/chosen": -0.28674107789993286, "rewards/margins": 0.049817681312561035, "rewards/rejected": -0.3365587294101715, "step": 1120 }, { "epoch": 3.0691307323750854, "grad_norm": 2.2996087074279785, "learning_rate": 8.464383561643835e-07, "log_odds_chosen": 0.577833890914917, "log_odds_ratio": -0.5110927224159241, "logits/chosen": 0.644740104675293, "logits/rejected": 0.6156343221664429, "logps/chosen": -1.9587905406951904, "logps/rejected": -2.4436817169189453, "loss": 0.8563, "nll_loss": 0.8051947355270386, "rewards/accuracies": 0.75, "rewards/chosen": -0.19587907195091248, "rewards/margins": 0.04848910868167877, "rewards/rejected": -0.24436816573143005, "step": 1121 }, { "epoch": 3.0718685831622174, "grad_norm": 2.499481439590454, "learning_rate": 8.463013698630137e-07, "log_odds_chosen": 0.5463569164276123, "log_odds_ratio": -0.6314443945884705, "logits/chosen": 0.6468022465705872, "logits/rejected": 0.6809415817260742, "logps/chosen": -2.2930736541748047, "logps/rejected": -2.7460780143737793, "loss": 0.9245, "nll_loss": 0.8613474369049072, "rewards/accuracies": 0.625, "rewards/chosen": -0.2293073832988739, "rewards/margins": 0.0453004315495491, "rewards/rejected": -0.2746078073978424, "step": 1122 }, { "epoch": 3.07460643394935, "grad_norm": 2.1305015087127686, "learning_rate": 8.461643835616437e-07, "log_odds_chosen": 0.8602712750434875, "log_odds_ratio": -0.4390547275543213, "logits/chosen": 0.5248732566833496, "logits/rejected": 0.4780541658401489, "logps/chosen": -1.9071943759918213, "logps/rejected": -2.665004253387451, "loss": 0.9451, "nll_loss": 0.9012091159820557, "rewards/accuracies": 0.625, "rewards/chosen": -0.19071942567825317, "rewards/margins": 0.07578098773956299, "rewards/rejected": -0.26650041341781616, "step": 1123 }, { "epoch": 3.077344284736482, "grad_norm": 2.179142951965332, "learning_rate": 8.46027397260274e-07, "log_odds_chosen": 1.1608657836914062, "log_odds_ratio": -0.4094092845916748, "logits/chosen": 0.6597608923912048, "logits/rejected": 0.6956533193588257, "logps/chosen": -1.7680847644805908, "logps/rejected": -2.767256259918213, "loss": 0.8641, "nll_loss": 0.8231524229049683, "rewards/accuracies": 0.875, "rewards/chosen": -0.17680847644805908, "rewards/margins": 0.09991714358329773, "rewards/rejected": -0.2767256200313568, "step": 1124 }, { "epoch": 3.080082135523614, "grad_norm": 2.0348520278930664, "learning_rate": 8.458904109589042e-07, "log_odds_chosen": 1.3980894088745117, "log_odds_ratio": -0.36656850576400757, "logits/chosen": 0.6968967318534851, "logits/rejected": 0.6528590321540833, "logps/chosen": -1.9433444738388062, "logps/rejected": -3.264035940170288, "loss": 0.9118, "nll_loss": 0.8751233220100403, "rewards/accuracies": 1.0, "rewards/chosen": -0.19433444738388062, "rewards/margins": 0.1320691555738449, "rewards/rejected": -0.3264036178588867, "step": 1125 }, { "epoch": 3.082819986310746, "grad_norm": 2.265481948852539, "learning_rate": 8.457534246575341e-07, "log_odds_chosen": 0.4350633919239044, "log_odds_ratio": -0.5268228650093079, "logits/chosen": 0.4962558150291443, "logits/rejected": 0.49633461236953735, "logps/chosen": -1.7207326889038086, "logps/rejected": -2.08272647857666, "loss": 0.8235, "nll_loss": 0.7708466649055481, "rewards/accuracies": 0.875, "rewards/chosen": -0.17207327485084534, "rewards/margins": 0.036199383437633514, "rewards/rejected": -0.20827266573905945, "step": 1126 }, { "epoch": 3.0855578370978782, "grad_norm": 2.905858278274536, "learning_rate": 8.456164383561644e-07, "log_odds_chosen": 0.6629433631896973, "log_odds_ratio": -0.4876835346221924, "logits/chosen": 0.7713944911956787, "logits/rejected": 0.7844208478927612, "logps/chosen": -2.179042339324951, "logps/rejected": -2.788802146911621, "loss": 0.9163, "nll_loss": 0.8675808906555176, "rewards/accuracies": 0.75, "rewards/chosen": -0.2179042249917984, "rewards/margins": 0.06097601354122162, "rewards/rejected": -0.27888023853302, "step": 1127 }, { "epoch": 3.0882956878850103, "grad_norm": 2.600865602493286, "learning_rate": 8.454794520547945e-07, "log_odds_chosen": 0.528377115726471, "log_odds_ratio": -0.7431538105010986, "logits/chosen": 0.815869927406311, "logits/rejected": 0.8208298683166504, "logps/chosen": -2.3314356803894043, "logps/rejected": -2.816324472427368, "loss": 0.996, "nll_loss": 0.921681821346283, "rewards/accuracies": 0.625, "rewards/chosen": -0.23314353823661804, "rewards/margins": 0.04848889261484146, "rewards/rejected": -0.2816324532032013, "step": 1128 }, { "epoch": 3.0910335386721424, "grad_norm": 4.274008750915527, "learning_rate": 8.453424657534247e-07, "log_odds_chosen": 0.5205428004264832, "log_odds_ratio": -0.697256863117218, "logits/chosen": 0.5619974732398987, "logits/rejected": 0.5207825899124146, "logps/chosen": -2.316636562347412, "logps/rejected": -2.721365451812744, "loss": 0.9993, "nll_loss": 0.9295339584350586, "rewards/accuracies": 0.625, "rewards/chosen": -0.23166367411613464, "rewards/margins": 0.04047289490699768, "rewards/rejected": -0.2721365690231323, "step": 1129 }, { "epoch": 3.0937713894592744, "grad_norm": 3.074295997619629, "learning_rate": 8.452054794520548e-07, "log_odds_chosen": 0.7973880171775818, "log_odds_ratio": -0.5164871215820312, "logits/chosen": 0.8251532316207886, "logits/rejected": 0.8361527323722839, "logps/chosen": -2.363443374633789, "logps/rejected": -3.066992998123169, "loss": 0.9026, "nll_loss": 0.85090571641922, "rewards/accuracies": 0.875, "rewards/chosen": -0.2363443374633789, "rewards/margins": 0.07035496830940247, "rewards/rejected": -0.30669930577278137, "step": 1130 }, { "epoch": 3.0965092402464065, "grad_norm": 2.4372637271881104, "learning_rate": 8.450684931506849e-07, "log_odds_chosen": 0.9228915572166443, "log_odds_ratio": -0.4374368488788605, "logits/chosen": 0.856709361076355, "logits/rejected": 0.8262520432472229, "logps/chosen": -3.1276440620422363, "logps/rejected": -3.953808307647705, "loss": 0.911, "nll_loss": 0.8672906160354614, "rewards/accuracies": 0.75, "rewards/chosen": -0.31276440620422363, "rewards/margins": 0.08261643350124359, "rewards/rejected": -0.3953808546066284, "step": 1131 }, { "epoch": 3.0992470910335386, "grad_norm": 2.6628057956695557, "learning_rate": 8.449315068493151e-07, "log_odds_chosen": 1.0313239097595215, "log_odds_ratio": -0.5034894347190857, "logits/chosen": 0.7932511568069458, "logits/rejected": 0.7669814825057983, "logps/chosen": -2.1238996982574463, "logps/rejected": -3.031298875808716, "loss": 0.9447, "nll_loss": 0.8943189382553101, "rewards/accuracies": 0.875, "rewards/chosen": -0.2123899757862091, "rewards/margins": 0.09073987603187561, "rewards/rejected": -0.3031298518180847, "step": 1132 }, { "epoch": 3.1019849418206706, "grad_norm": 3.1804866790771484, "learning_rate": 8.447945205479452e-07, "log_odds_chosen": 0.6825677156448364, "log_odds_ratio": -0.7501968741416931, "logits/chosen": 0.5463453531265259, "logits/rejected": 0.5040287971496582, "logps/chosen": -2.4462223052978516, "logps/rejected": -3.0543951988220215, "loss": 1.0007, "nll_loss": 0.9256991744041443, "rewards/accuracies": 0.75, "rewards/chosen": -0.24462223052978516, "rewards/margins": 0.06081729382276535, "rewards/rejected": -0.3054395318031311, "step": 1133 }, { "epoch": 3.1047227926078027, "grad_norm": 2.511378049850464, "learning_rate": 8.446575342465753e-07, "log_odds_chosen": 0.48506465554237366, "log_odds_ratio": -0.602035403251648, "logits/chosen": 0.620801568031311, "logits/rejected": 0.618018627166748, "logps/chosen": -2.165865898132324, "logps/rejected": -2.529655694961548, "loss": 0.856, "nll_loss": 0.7957990765571594, "rewards/accuracies": 0.875, "rewards/chosen": -0.21658660471439362, "rewards/margins": 0.03637896478176117, "rewards/rejected": -0.2529655694961548, "step": 1134 }, { "epoch": 3.1074606433949348, "grad_norm": 1.9031941890716553, "learning_rate": 8.445205479452055e-07, "log_odds_chosen": 1.7833466529846191, "log_odds_ratio": -0.3252369165420532, "logits/chosen": 0.5269911885261536, "logits/rejected": 0.4567175805568695, "logps/chosen": -1.952414631843567, "logps/rejected": -3.5954740047454834, "loss": 0.8489, "nll_loss": 0.8163537979125977, "rewards/accuracies": 0.875, "rewards/chosen": -0.19524146616458893, "rewards/margins": 0.1643059253692627, "rewards/rejected": -0.3595474064350128, "step": 1135 }, { "epoch": 3.1101984941820673, "grad_norm": 3.2925758361816406, "learning_rate": 8.443835616438357e-07, "log_odds_chosen": 0.588035523891449, "log_odds_ratio": -0.517276406288147, "logits/chosen": 0.7733167409896851, "logits/rejected": 0.7550026774406433, "logps/chosen": -2.5972249507904053, "logps/rejected": -3.1295523643493652, "loss": 0.8582, "nll_loss": 0.8064596652984619, "rewards/accuracies": 0.75, "rewards/chosen": -0.2597225308418274, "rewards/margins": 0.05323270335793495, "rewards/rejected": -0.31295520067214966, "step": 1136 }, { "epoch": 3.1129363449691994, "grad_norm": 2.5973613262176514, "learning_rate": 8.442465753424657e-07, "log_odds_chosen": 1.0038212537765503, "log_odds_ratio": -0.4978194832801819, "logits/chosen": 0.7900372743606567, "logits/rejected": 0.6917800903320312, "logps/chosen": -2.4830031394958496, "logps/rejected": -3.441154956817627, "loss": 0.8974, "nll_loss": 0.8475809097290039, "rewards/accuracies": 0.75, "rewards/chosen": -0.24830034375190735, "rewards/margins": 0.09581516683101654, "rewards/rejected": -0.3441154956817627, "step": 1137 }, { "epoch": 3.1156741957563314, "grad_norm": 2.372774362564087, "learning_rate": 8.441095890410959e-07, "log_odds_chosen": 0.8234714269638062, "log_odds_ratio": -0.4823454022407532, "logits/chosen": 0.5714786052703857, "logits/rejected": 0.5519014596939087, "logps/chosen": -1.945734977722168, "logps/rejected": -2.6794064044952393, "loss": 0.9604, "nll_loss": 0.9121173024177551, "rewards/accuracies": 0.75, "rewards/chosen": -0.19457349181175232, "rewards/margins": 0.07336714118719101, "rewards/rejected": -0.2679406404495239, "step": 1138 }, { "epoch": 3.1184120465434635, "grad_norm": 2.3176515102386475, "learning_rate": 8.439726027397261e-07, "log_odds_chosen": 0.4551413059234619, "log_odds_ratio": -0.5514631271362305, "logits/chosen": 0.6619856357574463, "logits/rejected": 0.6245942115783691, "logps/chosen": -2.0587353706359863, "logps/rejected": -2.4480509757995605, "loss": 0.9272, "nll_loss": 0.8720959424972534, "rewards/accuracies": 0.625, "rewards/chosen": -0.2058735489845276, "rewards/margins": 0.03893157094717026, "rewards/rejected": -0.24480511248111725, "step": 1139 }, { "epoch": 3.1211498973305956, "grad_norm": 4.789511203765869, "learning_rate": 8.438356164383561e-07, "log_odds_chosen": 0.49240750074386597, "log_odds_ratio": -0.9964036345481873, "logits/chosen": 0.6351169943809509, "logits/rejected": 0.5981416702270508, "logps/chosen": -3.8178954124450684, "logps/rejected": -4.239877223968506, "loss": 0.9968, "nll_loss": 0.89717036485672, "rewards/accuracies": 0.5, "rewards/chosen": -0.38178956508636475, "rewards/margins": 0.04219818487763405, "rewards/rejected": -0.4239877462387085, "step": 1140 }, { "epoch": 3.1238877481177276, "grad_norm": 2.590566873550415, "learning_rate": 8.436986301369863e-07, "log_odds_chosen": 0.7826565504074097, "log_odds_ratio": -0.4876556098461151, "logits/chosen": 0.5402875542640686, "logits/rejected": 0.44196704030036926, "logps/chosen": -2.1670589447021484, "logps/rejected": -2.844820022583008, "loss": 0.8935, "nll_loss": 0.8447224497795105, "rewards/accuracies": 0.75, "rewards/chosen": -0.21670588850975037, "rewards/margins": 0.06777611374855042, "rewards/rejected": -0.2844820022583008, "step": 1141 }, { "epoch": 3.1266255989048597, "grad_norm": 2.5044350624084473, "learning_rate": 8.435616438356165e-07, "log_odds_chosen": 1.5172264575958252, "log_odds_ratio": -0.6080124974250793, "logits/chosen": 0.7713210582733154, "logits/rejected": 0.8038954734802246, "logps/chosen": -3.170248031616211, "logps/rejected": -4.641049861907959, "loss": 0.9408, "nll_loss": 0.8800408840179443, "rewards/accuracies": 0.625, "rewards/chosen": -0.3170247972011566, "rewards/margins": 0.1470801681280136, "rewards/rejected": -0.4641050100326538, "step": 1142 }, { "epoch": 3.1293634496919918, "grad_norm": 2.475950002670288, "learning_rate": 8.434246575342465e-07, "log_odds_chosen": -0.05870106816291809, "log_odds_ratio": -0.7961830496788025, "logits/chosen": 0.6018511056900024, "logits/rejected": 0.6169069409370422, "logps/chosen": -2.3831262588500977, "logps/rejected": -2.273961305618286, "loss": 0.9312, "nll_loss": 0.8515481948852539, "rewards/accuracies": 0.5, "rewards/chosen": -0.23831260204315186, "rewards/margins": -0.010916461236774921, "rewards/rejected": -0.2273961305618286, "step": 1143 }, { "epoch": 3.132101300479124, "grad_norm": 2.151202440261841, "learning_rate": 8.432876712328767e-07, "log_odds_chosen": 1.4297454357147217, "log_odds_ratio": -0.29388490319252014, "logits/chosen": 0.6476706266403198, "logits/rejected": 0.5813559889793396, "logps/chosen": -2.1103310585021973, "logps/rejected": -3.3997507095336914, "loss": 0.8396, "nll_loss": 0.810179591178894, "rewards/accuracies": 0.875, "rewards/chosen": -0.21103313565254211, "rewards/margins": 0.12894195318222046, "rewards/rejected": -0.3399750590324402, "step": 1144 }, { "epoch": 3.134839151266256, "grad_norm": 2.565855026245117, "learning_rate": 8.431506849315068e-07, "log_odds_chosen": 0.4883183240890503, "log_odds_ratio": -0.5880789756774902, "logits/chosen": 0.544173002243042, "logits/rejected": 0.5349066853523254, "logps/chosen": -2.177400588989258, "logps/rejected": -2.640587329864502, "loss": 0.9146, "nll_loss": 0.8558065295219421, "rewards/accuracies": 0.625, "rewards/chosen": -0.21774007380008698, "rewards/margins": 0.0463186614215374, "rewards/rejected": -0.2640587091445923, "step": 1145 }, { "epoch": 3.137577002053388, "grad_norm": 2.192361831665039, "learning_rate": 8.43013698630137e-07, "log_odds_chosen": 1.208254337310791, "log_odds_ratio": -0.3312349319458008, "logits/chosen": 0.7813290357589722, "logits/rejected": 0.7747058868408203, "logps/chosen": -2.0911662578582764, "logps/rejected": -3.1992974281311035, "loss": 0.858, "nll_loss": 0.8248472213745117, "rewards/accuracies": 0.875, "rewards/chosen": -0.2091166377067566, "rewards/margins": 0.11081313341856003, "rewards/rejected": -0.3199297785758972, "step": 1146 }, { "epoch": 3.14031485284052, "grad_norm": 3.342888593673706, "learning_rate": 8.428767123287671e-07, "log_odds_chosen": 0.8813533782958984, "log_odds_ratio": -0.43476030230522156, "logits/chosen": 0.5510207414627075, "logits/rejected": 0.4014272093772888, "logps/chosen": -2.222463369369507, "logps/rejected": -2.994220018386841, "loss": 0.9389, "nll_loss": 0.8953869938850403, "rewards/accuracies": 0.875, "rewards/chosen": -0.22224634885787964, "rewards/margins": 0.07717566192150116, "rewards/rejected": -0.299422025680542, "step": 1147 }, { "epoch": 3.143052703627652, "grad_norm": 4.949929714202881, "learning_rate": 8.427397260273972e-07, "log_odds_chosen": 0.6378663182258606, "log_odds_ratio": -0.6750388145446777, "logits/chosen": 0.7629462480545044, "logits/rejected": 0.7445520162582397, "logps/chosen": -3.089371919631958, "logps/rejected": -3.6860733032226562, "loss": 0.8817, "nll_loss": 0.8141730427742004, "rewards/accuracies": 0.625, "rewards/chosen": -0.3089371919631958, "rewards/margins": 0.059670135378837585, "rewards/rejected": -0.3686073422431946, "step": 1148 }, { "epoch": 3.145790554414784, "grad_norm": 2.3216545581817627, "learning_rate": 8.426027397260274e-07, "log_odds_chosen": 0.6273318529129028, "log_odds_ratio": -0.5930166244506836, "logits/chosen": 0.7353126406669617, "logits/rejected": 0.7001859545707703, "logps/chosen": -1.9228448867797852, "logps/rejected": -2.4826064109802246, "loss": 0.9366, "nll_loss": 0.8773413300514221, "rewards/accuracies": 0.875, "rewards/chosen": -0.192284494638443, "rewards/margins": 0.05597613751888275, "rewards/rejected": -0.24826063215732574, "step": 1149 }, { "epoch": 3.1485284052019167, "grad_norm": 2.5254781246185303, "learning_rate": 8.424657534246576e-07, "log_odds_chosen": 0.27510032057762146, "log_odds_ratio": -0.9107722043991089, "logits/chosen": 0.6310594081878662, "logits/rejected": 0.6648975014686584, "logps/chosen": -2.12904953956604, "logps/rejected": -2.292264223098755, "loss": 0.8757, "nll_loss": 0.7845869660377502, "rewards/accuracies": 0.75, "rewards/chosen": -0.21290495991706848, "rewards/margins": 0.016321485862135887, "rewards/rejected": -0.22922644019126892, "step": 1150 }, { "epoch": 3.1512662559890487, "grad_norm": 2.4924399852752686, "learning_rate": 8.423287671232876e-07, "log_odds_chosen": 1.6886483430862427, "log_odds_ratio": -0.29846447706222534, "logits/chosen": 0.7197940945625305, "logits/rejected": 0.6992305517196655, "logps/chosen": -1.7989799976348877, "logps/rejected": -3.293926239013672, "loss": 0.7724, "nll_loss": 0.7425246238708496, "rewards/accuracies": 1.0, "rewards/chosen": -0.1798979938030243, "rewards/margins": 0.14949461817741394, "rewards/rejected": -0.32939261198043823, "step": 1151 }, { "epoch": 3.154004106776181, "grad_norm": 2.5357859134674072, "learning_rate": 8.421917808219178e-07, "log_odds_chosen": 1.2924857139587402, "log_odds_ratio": -0.4630342125892639, "logits/chosen": 0.6289095282554626, "logits/rejected": 0.5916961431503296, "logps/chosen": -2.4136881828308105, "logps/rejected": -3.588608741760254, "loss": 0.924, "nll_loss": 0.8776912689208984, "rewards/accuracies": 0.875, "rewards/chosen": -0.24136880040168762, "rewards/margins": 0.11749207228422165, "rewards/rejected": -0.3588608503341675, "step": 1152 }, { "epoch": 3.156741957563313, "grad_norm": 2.3461697101593018, "learning_rate": 8.42054794520548e-07, "log_odds_chosen": 1.9506473541259766, "log_odds_ratio": -0.23236460983753204, "logits/chosen": 0.47766584157943726, "logits/rejected": 0.3797813951969147, "logps/chosen": -2.308389186859131, "logps/rejected": -4.113356113433838, "loss": 0.9444, "nll_loss": 0.9211215376853943, "rewards/accuracies": 1.0, "rewards/chosen": -0.23083890974521637, "rewards/margins": 0.1804967075586319, "rewards/rejected": -0.41133561730384827, "step": 1153 }, { "epoch": 3.159479808350445, "grad_norm": 2.7164433002471924, "learning_rate": 8.41917808219178e-07, "log_odds_chosen": 1.2731133699417114, "log_odds_ratio": -0.6400773525238037, "logits/chosen": 0.43077245354652405, "logits/rejected": 0.3444143533706665, "logps/chosen": -2.5293397903442383, "logps/rejected": -3.6859896183013916, "loss": 0.9385, "nll_loss": 0.874474823474884, "rewards/accuracies": 0.75, "rewards/chosen": -0.2529340088367462, "rewards/margins": 0.11566494405269623, "rewards/rejected": -0.36859893798828125, "step": 1154 }, { "epoch": 3.162217659137577, "grad_norm": 2.6526360511779785, "learning_rate": 8.417808219178082e-07, "log_odds_chosen": 0.43693435192108154, "log_odds_ratio": -0.6745319366455078, "logits/chosen": 0.8027233481407166, "logits/rejected": 0.7992336750030518, "logps/chosen": -2.4886844158172607, "logps/rejected": -2.877412796020508, "loss": 0.8809, "nll_loss": 0.8134291172027588, "rewards/accuracies": 0.625, "rewards/chosen": -0.24886846542358398, "rewards/margins": 0.03887281194329262, "rewards/rejected": -0.2877412736415863, "step": 1155 }, { "epoch": 3.164955509924709, "grad_norm": 2.1294140815734863, "learning_rate": 8.416438356164384e-07, "log_odds_chosen": 1.6368703842163086, "log_odds_ratio": -0.3437512516975403, "logits/chosen": 0.6464563608169556, "logits/rejected": 0.6092350482940674, "logps/chosen": -1.9433722496032715, "logps/rejected": -3.503903388977051, "loss": 0.9331, "nll_loss": 0.898723304271698, "rewards/accuracies": 0.875, "rewards/chosen": -0.19433723390102386, "rewards/margins": 0.1560531109571457, "rewards/rejected": -0.35039031505584717, "step": 1156 }, { "epoch": 3.167693360711841, "grad_norm": 2.3277759552001953, "learning_rate": 8.415068493150684e-07, "log_odds_chosen": 0.7268433570861816, "log_odds_ratio": -0.571374237537384, "logits/chosen": 0.43490636348724365, "logits/rejected": 0.4128236174583435, "logps/chosen": -2.099928140640259, "logps/rejected": -2.724573850631714, "loss": 0.8871, "nll_loss": 0.8299975991249084, "rewards/accuracies": 0.75, "rewards/chosen": -0.20999284088611603, "rewards/margins": 0.06246455758810043, "rewards/rejected": -0.27245739102363586, "step": 1157 }, { "epoch": 3.1704312114989732, "grad_norm": 2.3196828365325928, "learning_rate": 8.413698630136986e-07, "log_odds_chosen": 1.12245774269104, "log_odds_ratio": -0.4282374083995819, "logits/chosen": 0.5473359823226929, "logits/rejected": 0.47395357489585876, "logps/chosen": -2.038738250732422, "logps/rejected": -3.077397108078003, "loss": 0.8637, "nll_loss": 0.8208590149879456, "rewards/accuracies": 0.75, "rewards/chosen": -0.20387384295463562, "rewards/margins": 0.10386586934328079, "rewards/rejected": -0.3077397346496582, "step": 1158 }, { "epoch": 3.1731690622861053, "grad_norm": 1.9637919664382935, "learning_rate": 8.412328767123287e-07, "log_odds_chosen": 1.4340083599090576, "log_odds_ratio": -0.45890167355537415, "logits/chosen": 0.5808746814727783, "logits/rejected": 0.5656935572624207, "logps/chosen": -2.060199737548828, "logps/rejected": -3.3507742881774902, "loss": 0.856, "nll_loss": 0.8101336359977722, "rewards/accuracies": 0.875, "rewards/chosen": -0.20601995289325714, "rewards/margins": 0.12905748188495636, "rewards/rejected": -0.3350774347782135, "step": 1159 }, { "epoch": 3.1759069130732374, "grad_norm": 2.815657138824463, "learning_rate": 8.410958904109589e-07, "log_odds_chosen": 0.471429705619812, "log_odds_ratio": -0.6672448515892029, "logits/chosen": 0.8973636031150818, "logits/rejected": 0.9506098031997681, "logps/chosen": -2.656221389770508, "logps/rejected": -3.0954740047454834, "loss": 0.8481, "nll_loss": 0.7813326716423035, "rewards/accuracies": 0.375, "rewards/chosen": -0.2656221389770508, "rewards/margins": 0.043925248086452484, "rewards/rejected": -0.30954739451408386, "step": 1160 }, { "epoch": 3.1786447638603694, "grad_norm": 2.241201162338257, "learning_rate": 8.40958904109589e-07, "log_odds_chosen": 1.4790189266204834, "log_odds_ratio": -0.32299306988716125, "logits/chosen": 0.4425795078277588, "logits/rejected": 0.4443106949329376, "logps/chosen": -1.9832841157913208, "logps/rejected": -3.3588218688964844, "loss": 0.877, "nll_loss": 0.8447062969207764, "rewards/accuracies": 0.875, "rewards/chosen": -0.19832843542099, "rewards/margins": 0.13755378127098083, "rewards/rejected": -0.33588218688964844, "step": 1161 }, { "epoch": 3.181382614647502, "grad_norm": 1.9891870021820068, "learning_rate": 8.408219178082191e-07, "log_odds_chosen": 0.6834841370582581, "log_odds_ratio": -0.6189597845077515, "logits/chosen": 0.8435438871383667, "logits/rejected": 0.829302966594696, "logps/chosen": -1.9851658344268799, "logps/rejected": -2.5188369750976562, "loss": 0.9741, "nll_loss": 0.9122386574745178, "rewards/accuracies": 0.875, "rewards/chosen": -0.1985166072845459, "rewards/margins": 0.05336710810661316, "rewards/rejected": -0.25188368558883667, "step": 1162 }, { "epoch": 3.184120465434634, "grad_norm": 2.535961151123047, "learning_rate": 8.406849315068493e-07, "log_odds_chosen": 0.7559865117073059, "log_odds_ratio": -0.4522102475166321, "logits/chosen": 0.5764405131340027, "logits/rejected": 0.45752909779548645, "logps/chosen": -2.1547420024871826, "logps/rejected": -2.7555065155029297, "loss": 0.9939, "nll_loss": 0.948712170124054, "rewards/accuracies": 0.875, "rewards/chosen": -0.2154742032289505, "rewards/margins": 0.06007646024227142, "rewards/rejected": -0.2755506634712219, "step": 1163 }, { "epoch": 3.186858316221766, "grad_norm": 4.9530510902404785, "learning_rate": 8.405479452054794e-07, "log_odds_chosen": 1.0525145530700684, "log_odds_ratio": -0.4126173257827759, "logits/chosen": 0.4581025540828705, "logits/rejected": 0.4034709334373474, "logps/chosen": -1.735811710357666, "logps/rejected": -2.7423434257507324, "loss": 0.8653, "nll_loss": 0.824027419090271, "rewards/accuracies": 0.875, "rewards/chosen": -0.17358115315437317, "rewards/margins": 0.10065320134162903, "rewards/rejected": -0.2742343544960022, "step": 1164 }, { "epoch": 3.189596167008898, "grad_norm": 2.351278066635132, "learning_rate": 8.404109589041095e-07, "log_odds_chosen": 0.44235658645629883, "log_odds_ratio": -0.5681401491165161, "logits/chosen": 0.6814529299736023, "logits/rejected": 0.6132737398147583, "logps/chosen": -1.516611099243164, "logps/rejected": -1.8510640859603882, "loss": 0.9635, "nll_loss": 0.9067333340644836, "rewards/accuracies": 0.75, "rewards/chosen": -0.15166111290454865, "rewards/margins": 0.03344529867172241, "rewards/rejected": -0.18510641157627106, "step": 1165 }, { "epoch": 3.19233401779603, "grad_norm": 3.090341806411743, "learning_rate": 8.402739726027397e-07, "log_odds_chosen": 0.6466891765594482, "log_odds_ratio": -0.7462977170944214, "logits/chosen": 0.8192429542541504, "logits/rejected": 0.89405357837677, "logps/chosen": -2.6716861724853516, "logps/rejected": -3.2173566818237305, "loss": 0.8325, "nll_loss": 0.7579103708267212, "rewards/accuracies": 0.75, "rewards/chosen": -0.2671686112880707, "rewards/margins": 0.054567065089941025, "rewards/rejected": -0.321735680103302, "step": 1166 }, { "epoch": 3.1950718685831623, "grad_norm": 2.3733043670654297, "learning_rate": 8.401369863013699e-07, "log_odds_chosen": 0.8499343395233154, "log_odds_ratio": -0.388248085975647, "logits/chosen": 0.579963743686676, "logits/rejected": 0.609418511390686, "logps/chosen": -1.7485604286193848, "logps/rejected": -2.488013505935669, "loss": 0.7759, "nll_loss": 0.7370457649230957, "rewards/accuracies": 1.0, "rewards/chosen": -0.174856036901474, "rewards/margins": 0.0739453136920929, "rewards/rejected": -0.2488013505935669, "step": 1167 }, { "epoch": 3.1978097193702943, "grad_norm": 2.519317388534546, "learning_rate": 8.399999999999999e-07, "log_odds_chosen": 0.5775246024131775, "log_odds_ratio": -0.48498451709747314, "logits/chosen": 0.7327845692634583, "logits/rejected": 0.6454626321792603, "logps/chosen": -2.3570122718811035, "logps/rejected": -2.859278440475464, "loss": 0.9319, "nll_loss": 0.8834433555603027, "rewards/accuracies": 0.875, "rewards/chosen": -0.23570123314857483, "rewards/margins": 0.05022662878036499, "rewards/rejected": -0.28592783212661743, "step": 1168 }, { "epoch": 3.2005475701574264, "grad_norm": 3.202822208404541, "learning_rate": 8.398630136986301e-07, "log_odds_chosen": 0.3492651581764221, "log_odds_ratio": -0.697021484375, "logits/chosen": 0.5716567039489746, "logits/rejected": 0.5345720648765564, "logps/chosen": -2.0237019062042236, "logps/rejected": -2.242100715637207, "loss": 0.9416, "nll_loss": 0.8718618750572205, "rewards/accuracies": 0.75, "rewards/chosen": -0.20237019658088684, "rewards/margins": 0.021839868277311325, "rewards/rejected": -0.22421006858348846, "step": 1169 }, { "epoch": 3.2032854209445585, "grad_norm": 2.0529868602752686, "learning_rate": 8.397260273972603e-07, "log_odds_chosen": 0.3931834101676941, "log_odds_ratio": -0.588959276676178, "logits/chosen": 0.6603078842163086, "logits/rejected": 0.666105329990387, "logps/chosen": -2.0329744815826416, "logps/rejected": -2.4053893089294434, "loss": 0.9315, "nll_loss": 0.872563362121582, "rewards/accuracies": 0.625, "rewards/chosen": -0.2032974511384964, "rewards/margins": 0.03724149987101555, "rewards/rejected": -0.24053892493247986, "step": 1170 }, { "epoch": 3.2060232717316905, "grad_norm": 2.6188340187072754, "learning_rate": 8.395890410958903e-07, "log_odds_chosen": 0.2660999894142151, "log_odds_ratio": -0.601902961730957, "logits/chosen": 0.44315630197525024, "logits/rejected": 0.32978734374046326, "logps/chosen": -2.102688789367676, "logps/rejected": -2.3232626914978027, "loss": 0.9425, "nll_loss": 0.8822956085205078, "rewards/accuracies": 0.5, "rewards/chosen": -0.21026889979839325, "rewards/margins": 0.022057393565773964, "rewards/rejected": -0.23232629895210266, "step": 1171 }, { "epoch": 3.2087611225188226, "grad_norm": 2.45259690284729, "learning_rate": 8.394520547945205e-07, "log_odds_chosen": -0.04762519896030426, "log_odds_ratio": -0.7705249786376953, "logits/chosen": 0.4522341191768646, "logits/rejected": 0.4094865322113037, "logps/chosen": -2.351156234741211, "logps/rejected": -2.28263783454895, "loss": 0.9845, "nll_loss": 0.9074360132217407, "rewards/accuracies": 0.625, "rewards/chosen": -0.2351156324148178, "rewards/margins": -0.0068518416956067085, "rewards/rejected": -0.22826378047466278, "step": 1172 }, { "epoch": 3.2114989733059547, "grad_norm": 1.9681422710418701, "learning_rate": 8.393150684931507e-07, "log_odds_chosen": 1.7781765460968018, "log_odds_ratio": -0.290393203496933, "logits/chosen": 0.6991653442382812, "logits/rejected": 0.6717350482940674, "logps/chosen": -2.052203416824341, "logps/rejected": -3.7308456897735596, "loss": 0.8259, "nll_loss": 0.7968108057975769, "rewards/accuracies": 1.0, "rewards/chosen": -0.20522035658359528, "rewards/margins": 0.16786423325538635, "rewards/rejected": -0.3730846047401428, "step": 1173 }, { "epoch": 3.2142368240930868, "grad_norm": 2.4594264030456543, "learning_rate": 8.391780821917808e-07, "log_odds_chosen": 0.7271844744682312, "log_odds_ratio": -0.4687492847442627, "logits/chosen": 0.4841075539588928, "logits/rejected": 0.42322051525115967, "logps/chosen": -2.078883171081543, "logps/rejected": -2.7531046867370605, "loss": 0.8868, "nll_loss": 0.8399479389190674, "rewards/accuracies": 0.625, "rewards/chosen": -0.20788832008838654, "rewards/margins": 0.06742215901613235, "rewards/rejected": -0.2753104567527771, "step": 1174 }, { "epoch": 3.216974674880219, "grad_norm": 2.2352044582366943, "learning_rate": 8.390410958904109e-07, "log_odds_chosen": 0.5090668201446533, "log_odds_ratio": -0.5306581854820251, "logits/chosen": 0.5775353312492371, "logits/rejected": 0.5546261668205261, "logps/chosen": -2.296992540359497, "logps/rejected": -2.728212356567383, "loss": 0.9235, "nll_loss": 0.8704015016555786, "rewards/accuracies": 0.875, "rewards/chosen": -0.2296992540359497, "rewards/margins": 0.04312199354171753, "rewards/rejected": -0.27282124757766724, "step": 1175 }, { "epoch": 3.2197125256673513, "grad_norm": 2.9478507041931152, "learning_rate": 8.38904109589041e-07, "log_odds_chosen": 0.7760655879974365, "log_odds_ratio": -0.5356851816177368, "logits/chosen": 0.7551781535148621, "logits/rejected": 0.7749913334846497, "logps/chosen": -2.2347702980041504, "logps/rejected": -2.877948522567749, "loss": 0.8092, "nll_loss": 0.7556113004684448, "rewards/accuracies": 0.625, "rewards/chosen": -0.2234770506620407, "rewards/margins": 0.06431781500577927, "rewards/rejected": -0.28779488801956177, "step": 1176 }, { "epoch": 3.2224503764544834, "grad_norm": 2.3741376399993896, "learning_rate": 8.387671232876712e-07, "log_odds_chosen": 1.4526915550231934, "log_odds_ratio": -0.24787980318069458, "logits/chosen": 1.0041333436965942, "logits/rejected": 1.0540547370910645, "logps/chosen": -1.9500541687011719, "logps/rejected": -3.276475667953491, "loss": 0.7337, "nll_loss": 0.7089021801948547, "rewards/accuracies": 1.0, "rewards/chosen": -0.19500543177127838, "rewards/margins": 0.13264213502407074, "rewards/rejected": -0.3276475667953491, "step": 1177 }, { "epoch": 3.2251882272416155, "grad_norm": 2.1089768409729004, "learning_rate": 8.386301369863013e-07, "log_odds_chosen": 1.7177221775054932, "log_odds_ratio": -0.28356069326400757, "logits/chosen": 0.716846227645874, "logits/rejected": 0.6863681077957153, "logps/chosen": -2.075265407562256, "logps/rejected": -3.660090208053589, "loss": 0.8782, "nll_loss": 0.8497967720031738, "rewards/accuracies": 1.0, "rewards/chosen": -0.2075265645980835, "rewards/margins": 0.15848247706890106, "rewards/rejected": -0.36600902676582336, "step": 1178 }, { "epoch": 3.2279260780287475, "grad_norm": 1.989437460899353, "learning_rate": 8.384931506849314e-07, "log_odds_chosen": 1.1000678539276123, "log_odds_ratio": -0.3428964614868164, "logits/chosen": 0.6217964887619019, "logits/rejected": 0.5968617796897888, "logps/chosen": -2.1717684268951416, "logps/rejected": -3.1807913780212402, "loss": 0.8572, "nll_loss": 0.822915256023407, "rewards/accuracies": 1.0, "rewards/chosen": -0.21717683970928192, "rewards/margins": 0.10090231895446777, "rewards/rejected": -0.3180791437625885, "step": 1179 }, { "epoch": 3.2306639288158796, "grad_norm": 2.253700017929077, "learning_rate": 8.383561643835616e-07, "log_odds_chosen": 0.887258768081665, "log_odds_ratio": -0.4119696617126465, "logits/chosen": 0.4825339913368225, "logits/rejected": 0.36019569635391235, "logps/chosen": -1.5275094509124756, "logps/rejected": -2.299597978591919, "loss": 0.8495, "nll_loss": 0.8082645535469055, "rewards/accuracies": 0.75, "rewards/chosen": -0.15275095403194427, "rewards/margins": 0.07720885425806046, "rewards/rejected": -0.22995978593826294, "step": 1180 }, { "epoch": 3.2334017796030117, "grad_norm": 2.2826106548309326, "learning_rate": 8.382191780821918e-07, "log_odds_chosen": 0.9468606114387512, "log_odds_ratio": -0.46509793400764465, "logits/chosen": 0.5078667998313904, "logits/rejected": 0.43508365750312805, "logps/chosen": -2.109130620956421, "logps/rejected": -2.9808077812194824, "loss": 0.8856, "nll_loss": 0.8390531539916992, "rewards/accuracies": 0.75, "rewards/chosen": -0.2109130620956421, "rewards/margins": 0.08716768026351929, "rewards/rejected": -0.2980807423591614, "step": 1181 }, { "epoch": 3.2361396303901437, "grad_norm": 2.8617217540740967, "learning_rate": 8.380821917808218e-07, "log_odds_chosen": 0.04346133768558502, "log_odds_ratio": -0.872042179107666, "logits/chosen": 0.6753572225570679, "logits/rejected": 0.684199869632721, "logps/chosen": -2.909125566482544, "logps/rejected": -2.9127068519592285, "loss": 0.8755, "nll_loss": 0.7883267998695374, "rewards/accuracies": 0.5, "rewards/chosen": -0.29091259837150574, "rewards/margins": 0.00035808607935905457, "rewards/rejected": -0.2912706732749939, "step": 1182 }, { "epoch": 3.238877481177276, "grad_norm": 2.662738084793091, "learning_rate": 8.37945205479452e-07, "log_odds_chosen": 0.7955805659294128, "log_odds_ratio": -0.504909098148346, "logits/chosen": 0.790489912033081, "logits/rejected": 0.7965452671051025, "logps/chosen": -2.5280420780181885, "logps/rejected": -3.2621023654937744, "loss": 0.9181, "nll_loss": 0.8676583170890808, "rewards/accuracies": 0.75, "rewards/chosen": -0.2528042197227478, "rewards/margins": 0.07340604066848755, "rewards/rejected": -0.32621026039123535, "step": 1183 }, { "epoch": 3.241615331964408, "grad_norm": 2.1950862407684326, "learning_rate": 8.378082191780822e-07, "log_odds_chosen": 0.45845842361450195, "log_odds_ratio": -0.5796648263931274, "logits/chosen": 0.6933336853981018, "logits/rejected": 0.6682873368263245, "logps/chosen": -1.821610927581787, "logps/rejected": -2.209329128265381, "loss": 0.8368, "nll_loss": 0.7788385152816772, "rewards/accuracies": 0.75, "rewards/chosen": -0.1821610927581787, "rewards/margins": 0.03877183049917221, "rewards/rejected": -0.22093293070793152, "step": 1184 }, { "epoch": 3.24435318275154, "grad_norm": 2.5462441444396973, "learning_rate": 8.376712328767122e-07, "log_odds_chosen": 0.4805133044719696, "log_odds_ratio": -0.7751232385635376, "logits/chosen": 0.6627820134162903, "logits/rejected": 0.6720244884490967, "logps/chosen": -2.0845322608947754, "logps/rejected": -2.6073834896087646, "loss": 0.9288, "nll_loss": 0.8513257503509521, "rewards/accuracies": 0.375, "rewards/chosen": -0.2084532380104065, "rewards/margins": 0.05228510871529579, "rewards/rejected": -0.260738343000412, "step": 1185 }, { "epoch": 3.247091033538672, "grad_norm": 2.4464237689971924, "learning_rate": 8.375342465753424e-07, "log_odds_chosen": 0.6275203227996826, "log_odds_ratio": -0.47373872995376587, "logits/chosen": 0.5414494276046753, "logits/rejected": 0.5249955058097839, "logps/chosen": -2.2201037406921387, "logps/rejected": -2.7670202255249023, "loss": 0.9899, "nll_loss": 0.9424793720245361, "rewards/accuracies": 0.875, "rewards/chosen": -0.22201038897037506, "rewards/margins": 0.05469166487455368, "rewards/rejected": -0.27670204639434814, "step": 1186 }, { "epoch": 3.249828884325804, "grad_norm": 2.986332416534424, "learning_rate": 8.373972602739726e-07, "log_odds_chosen": 0.09887464344501495, "log_odds_ratio": -0.7298468351364136, "logits/chosen": 0.8388526439666748, "logits/rejected": 0.8271574974060059, "logps/chosen": -2.853358745574951, "logps/rejected": -2.8956117630004883, "loss": 0.9202, "nll_loss": 0.8472521305084229, "rewards/accuracies": 0.5, "rewards/chosen": -0.285335898399353, "rewards/margins": 0.004225276410579681, "rewards/rejected": -0.2895611822605133, "step": 1187 }, { "epoch": 3.2525667351129366, "grad_norm": 2.2725846767425537, "learning_rate": 8.372602739726027e-07, "log_odds_chosen": 1.127862572669983, "log_odds_ratio": -0.3828829824924469, "logits/chosen": 0.7982226610183716, "logits/rejected": 0.8065658807754517, "logps/chosen": -2.290498733520508, "logps/rejected": -3.370319366455078, "loss": 0.806, "nll_loss": 0.7676622867584229, "rewards/accuracies": 0.875, "rewards/chosen": -0.22904986143112183, "rewards/margins": 0.1079820841550827, "rewards/rejected": -0.3370319604873657, "step": 1188 }, { "epoch": 3.2553045859000687, "grad_norm": 2.660022020339966, "learning_rate": 8.371232876712328e-07, "log_odds_chosen": 1.439695954322815, "log_odds_ratio": -0.49124184250831604, "logits/chosen": 1.0047929286956787, "logits/rejected": 1.0090349912643433, "logps/chosen": -2.259892463684082, "logps/rejected": -3.5057148933410645, "loss": 0.8469, "nll_loss": 0.7977747917175293, "rewards/accuracies": 0.75, "rewards/chosen": -0.22598926723003387, "rewards/margins": 0.12458223104476929, "rewards/rejected": -0.35057151317596436, "step": 1189 }, { "epoch": 3.2580424366872007, "grad_norm": 3.9609391689300537, "learning_rate": 8.369863013698629e-07, "log_odds_chosen": 0.32399195432662964, "log_odds_ratio": -0.893943190574646, "logits/chosen": 0.9376223683357239, "logits/rejected": 0.9537829756736755, "logps/chosen": -2.556802749633789, "logps/rejected": -2.908064603805542, "loss": 0.8516, "nll_loss": 0.7622311115264893, "rewards/accuracies": 0.625, "rewards/chosen": -0.25568029284477234, "rewards/margins": 0.03512618690729141, "rewards/rejected": -0.29080647230148315, "step": 1190 }, { "epoch": 3.260780287474333, "grad_norm": 2.542576551437378, "learning_rate": 8.368493150684931e-07, "log_odds_chosen": 0.5726680755615234, "log_odds_ratio": -0.4784156084060669, "logits/chosen": 0.5366111993789673, "logits/rejected": 0.40423059463500977, "logps/chosen": -1.7361483573913574, "logps/rejected": -2.2401626110076904, "loss": 0.9366, "nll_loss": 0.8888081312179565, "rewards/accuracies": 0.75, "rewards/chosen": -0.17361484467983246, "rewards/margins": 0.05040142312645912, "rewards/rejected": -0.22401626408100128, "step": 1191 }, { "epoch": 3.263518138261465, "grad_norm": 2.6793248653411865, "learning_rate": 8.367123287671232e-07, "log_odds_chosen": 0.33467185497283936, "log_odds_ratio": -0.7001742124557495, "logits/chosen": 0.6056684255599976, "logits/rejected": 0.6550842523574829, "logps/chosen": -2.565549373626709, "logps/rejected": -2.8519997596740723, "loss": 0.845, "nll_loss": 0.7749353051185608, "rewards/accuracies": 0.625, "rewards/chosen": -0.2565549612045288, "rewards/margins": 0.02864505723118782, "rewards/rejected": -0.28519999980926514, "step": 1192 }, { "epoch": 3.266255989048597, "grad_norm": 2.284343957901001, "learning_rate": 8.365753424657533e-07, "log_odds_chosen": 0.6779277920722961, "log_odds_ratio": -0.47466325759887695, "logits/chosen": 0.779874324798584, "logits/rejected": 0.8023687601089478, "logps/chosen": -2.1343207359313965, "logps/rejected": -2.736724376678467, "loss": 0.843, "nll_loss": 0.7955072522163391, "rewards/accuracies": 0.75, "rewards/chosen": -0.21343207359313965, "rewards/margins": 0.060240380465984344, "rewards/rejected": -0.2736724615097046, "step": 1193 }, { "epoch": 3.268993839835729, "grad_norm": 2.942607879638672, "learning_rate": 8.364383561643835e-07, "log_odds_chosen": 1.2144560813903809, "log_odds_ratio": -0.34860843420028687, "logits/chosen": 0.8033327460289001, "logits/rejected": 0.7943389415740967, "logps/chosen": -2.479083299636841, "logps/rejected": -3.5403733253479004, "loss": 0.8445, "nll_loss": 0.8096874952316284, "rewards/accuracies": 0.875, "rewards/chosen": -0.2479083389043808, "rewards/margins": 0.1061289831995964, "rewards/rejected": -0.3540373146533966, "step": 1194 }, { "epoch": 3.271731690622861, "grad_norm": 3.0116512775421143, "learning_rate": 8.363013698630137e-07, "log_odds_chosen": 1.1595149040222168, "log_odds_ratio": -0.5474570989608765, "logits/chosen": 0.9383790493011475, "logits/rejected": 0.9543541669845581, "logps/chosen": -2.65097713470459, "logps/rejected": -3.673438549041748, "loss": 0.8497, "nll_loss": 0.7949914932250977, "rewards/accuracies": 0.875, "rewards/chosen": -0.2650977075099945, "rewards/margins": 0.10224616527557373, "rewards/rejected": -0.36734384298324585, "step": 1195 }, { "epoch": 3.274469541409993, "grad_norm": 2.1007556915283203, "learning_rate": 8.361643835616437e-07, "log_odds_chosen": 1.3054473400115967, "log_odds_ratio": -0.3279843032360077, "logits/chosen": 0.6996710300445557, "logits/rejected": 0.6602658629417419, "logps/chosen": -1.5039807558059692, "logps/rejected": -2.6285018920898438, "loss": 0.8516, "nll_loss": 0.8188261985778809, "rewards/accuracies": 1.0, "rewards/chosen": -0.15039807558059692, "rewards/margins": 0.11245210468769073, "rewards/rejected": -0.26285019516944885, "step": 1196 }, { "epoch": 3.277207392197125, "grad_norm": 2.5662271976470947, "learning_rate": 8.360273972602739e-07, "log_odds_chosen": 0.9751328825950623, "log_odds_ratio": -0.402017742395401, "logits/chosen": 0.5764836072921753, "logits/rejected": 0.5060667395591736, "logps/chosen": -1.9683414697647095, "logps/rejected": -2.8311126232147217, "loss": 0.8799, "nll_loss": 0.8396850824356079, "rewards/accuracies": 0.875, "rewards/chosen": -0.19683414697647095, "rewards/margins": 0.08627711236476898, "rewards/rejected": -0.2831112742424011, "step": 1197 }, { "epoch": 3.2799452429842573, "grad_norm": 3.206145763397217, "learning_rate": 8.358904109589041e-07, "log_odds_chosen": 1.0465242862701416, "log_odds_ratio": -0.3942728340625763, "logits/chosen": 0.9474880695343018, "logits/rejected": 0.9876460433006287, "logps/chosen": -2.3676137924194336, "logps/rejected": -3.33427357673645, "loss": 1.0076, "nll_loss": 0.9681981801986694, "rewards/accuracies": 0.875, "rewards/chosen": -0.23676136136054993, "rewards/margins": 0.09666599333286285, "rewards/rejected": -0.333427369594574, "step": 1198 }, { "epoch": 3.2826830937713893, "grad_norm": 3.322519302368164, "learning_rate": 8.357534246575341e-07, "log_odds_chosen": 0.42823484539985657, "log_odds_ratio": -0.7991708517074585, "logits/chosen": 0.6428139209747314, "logits/rejected": 0.6632502675056458, "logps/chosen": -2.729611396789551, "logps/rejected": -3.090223789215088, "loss": 0.9224, "nll_loss": 0.8424330949783325, "rewards/accuracies": 0.625, "rewards/chosen": -0.2729611396789551, "rewards/margins": 0.036061231046915054, "rewards/rejected": -0.30902236700057983, "step": 1199 }, { "epoch": 3.2854209445585214, "grad_norm": 2.794717788696289, "learning_rate": 8.356164383561643e-07, "log_odds_chosen": 0.4199100732803345, "log_odds_ratio": -0.6018744111061096, "logits/chosen": 0.822404682636261, "logits/rejected": 0.7762359976768494, "logps/chosen": -2.2834553718566895, "logps/rejected": -2.62937331199646, "loss": 0.9602, "nll_loss": 0.9000214338302612, "rewards/accuracies": 0.75, "rewards/chosen": -0.22834554314613342, "rewards/margins": 0.03459177911281586, "rewards/rejected": -0.2629373073577881, "step": 1200 }, { "epoch": 3.2881587953456535, "grad_norm": 3.1577842235565186, "learning_rate": 8.354794520547945e-07, "log_odds_chosen": 0.31092628836631775, "log_odds_ratio": -0.7697759866714478, "logits/chosen": 0.4822220206260681, "logits/rejected": 0.4190710783004761, "logps/chosen": -2.344774007797241, "logps/rejected": -2.5963475704193115, "loss": 0.9731, "nll_loss": 0.8961367011070251, "rewards/accuracies": 0.75, "rewards/chosen": -0.23447740077972412, "rewards/margins": 0.025157351046800613, "rewards/rejected": -0.25963473320007324, "step": 1201 }, { "epoch": 3.2908966461327855, "grad_norm": 2.5554490089416504, "learning_rate": 8.353424657534246e-07, "log_odds_chosen": 0.8898066282272339, "log_odds_ratio": -0.3959372043609619, "logits/chosen": 0.5674747228622437, "logits/rejected": 0.5697360634803772, "logps/chosen": -1.727131962776184, "logps/rejected": -2.4507899284362793, "loss": 0.789, "nll_loss": 0.749435305595398, "rewards/accuracies": 1.0, "rewards/chosen": -0.17271319031715393, "rewards/margins": 0.07236580550670624, "rewards/rejected": -0.24507899582386017, "step": 1202 }, { "epoch": 3.293634496919918, "grad_norm": 2.0786962509155273, "learning_rate": 8.352054794520547e-07, "log_odds_chosen": 1.550458550453186, "log_odds_ratio": -0.27474445104599, "logits/chosen": 0.9020121097564697, "logits/rejected": 0.8936725854873657, "logps/chosen": -2.013847589492798, "logps/rejected": -3.46421480178833, "loss": 0.8064, "nll_loss": 0.7789399027824402, "rewards/accuracies": 1.0, "rewards/chosen": -0.2013847529888153, "rewards/margins": 0.1450367122888565, "rewards/rejected": -0.346421480178833, "step": 1203 }, { "epoch": 3.29637234770705, "grad_norm": 2.5507402420043945, "learning_rate": 8.350684931506848e-07, "log_odds_chosen": 0.5348920822143555, "log_odds_ratio": -0.5043765902519226, "logits/chosen": 0.9001491069793701, "logits/rejected": 0.8757636547088623, "logps/chosen": -2.1214611530303955, "logps/rejected": -2.552248477935791, "loss": 0.8395, "nll_loss": 0.789045512676239, "rewards/accuracies": 0.875, "rewards/chosen": -0.21214613318443298, "rewards/margins": 0.04307871311903, "rewards/rejected": -0.2552248239517212, "step": 1204 }, { "epoch": 3.299110198494182, "grad_norm": 2.3631725311279297, "learning_rate": 8.34931506849315e-07, "log_odds_chosen": 1.4108023643493652, "log_odds_ratio": -0.3327566981315613, "logits/chosen": 0.7487452030181885, "logits/rejected": 0.7257096767425537, "logps/chosen": -2.0372354984283447, "logps/rejected": -3.358468770980835, "loss": 0.8367, "nll_loss": 0.8034324645996094, "rewards/accuracies": 1.0, "rewards/chosen": -0.20372354984283447, "rewards/margins": 0.13212332129478455, "rewards/rejected": -0.335846871137619, "step": 1205 }, { "epoch": 3.3018480492813143, "grad_norm": 2.9124977588653564, "learning_rate": 8.347945205479451e-07, "log_odds_chosen": 0.7621986865997314, "log_odds_ratio": -0.6560429930686951, "logits/chosen": 0.8803917765617371, "logits/rejected": 0.894253671169281, "logps/chosen": -2.697657585144043, "logps/rejected": -3.4288947582244873, "loss": 0.7854, "nll_loss": 0.7197898626327515, "rewards/accuracies": 0.625, "rewards/chosen": -0.26976579427719116, "rewards/margins": 0.07312370091676712, "rewards/rejected": -0.3428894877433777, "step": 1206 }, { "epoch": 3.3045859000684463, "grad_norm": 2.543290615081787, "learning_rate": 8.346575342465752e-07, "log_odds_chosen": 0.1510475128889084, "log_odds_ratio": -0.6555065512657166, "logits/chosen": 0.596705973148346, "logits/rejected": 0.6401412487030029, "logps/chosen": -2.4565529823303223, "logps/rejected": -2.5883307456970215, "loss": 0.8865, "nll_loss": 0.8209720849990845, "rewards/accuracies": 0.5, "rewards/chosen": -0.24565529823303223, "rewards/margins": 0.013177789747714996, "rewards/rejected": -0.2588330805301666, "step": 1207 }, { "epoch": 3.3073237508555784, "grad_norm": 2.805525779724121, "learning_rate": 8.345205479452055e-07, "log_odds_chosen": 0.7733793258666992, "log_odds_ratio": -0.514906108379364, "logits/chosen": 0.5751959085464478, "logits/rejected": 0.588423490524292, "logps/chosen": -2.351079225540161, "logps/rejected": -3.041884660720825, "loss": 0.8953, "nll_loss": 0.8438138961791992, "rewards/accuracies": 0.75, "rewards/chosen": -0.2351079285144806, "rewards/margins": 0.06908053159713745, "rewards/rejected": -0.30418846011161804, "step": 1208 }, { "epoch": 3.3100616016427105, "grad_norm": 2.9093315601348877, "learning_rate": 8.343835616438357e-07, "log_odds_chosen": 0.6616608500480652, "log_odds_ratio": -0.8883269429206848, "logits/chosen": 0.8491115570068359, "logits/rejected": 0.8692641258239746, "logps/chosen": -3.055100440979004, "logps/rejected": -3.6742308139801025, "loss": 0.8639, "nll_loss": 0.7750334739685059, "rewards/accuracies": 0.75, "rewards/chosen": -0.3055100440979004, "rewards/margins": 0.06191302463412285, "rewards/rejected": -0.36742308735847473, "step": 1209 }, { "epoch": 3.3127994524298425, "grad_norm": 3.1899733543395996, "learning_rate": 8.342465753424657e-07, "log_odds_chosen": 0.7134077548980713, "log_odds_ratio": -0.4850142300128937, "logits/chosen": 0.6171062588691711, "logits/rejected": 0.5592008829116821, "logps/chosen": -2.062736749649048, "logps/rejected": -2.6516990661621094, "loss": 0.8721, "nll_loss": 0.823631227016449, "rewards/accuracies": 0.75, "rewards/chosen": -0.20627368986606598, "rewards/margins": 0.058896202594041824, "rewards/rejected": -0.2651699185371399, "step": 1210 }, { "epoch": 3.3155373032169746, "grad_norm": 2.593212842941284, "learning_rate": 8.341095890410959e-07, "log_odds_chosen": 0.4839237332344055, "log_odds_ratio": -0.5072489976882935, "logits/chosen": 0.7726486921310425, "logits/rejected": 0.7275124192237854, "logps/chosen": -1.8242785930633545, "logps/rejected": -2.2038965225219727, "loss": 0.9575, "nll_loss": 0.906816840171814, "rewards/accuracies": 0.875, "rewards/chosen": -0.18242785334587097, "rewards/margins": 0.03796180337667465, "rewards/rejected": -0.22038966417312622, "step": 1211 }, { "epoch": 3.3182751540041067, "grad_norm": 2.8508570194244385, "learning_rate": 8.339726027397261e-07, "log_odds_chosen": 0.8228578567504883, "log_odds_ratio": -0.42145803570747375, "logits/chosen": 0.9605545997619629, "logits/rejected": 0.9811601042747498, "logps/chosen": -2.274146795272827, "logps/rejected": -2.999136209487915, "loss": 0.7584, "nll_loss": 0.7162834405899048, "rewards/accuracies": 0.875, "rewards/chosen": -0.22741466760635376, "rewards/margins": 0.07249893248081207, "rewards/rejected": -0.2999136447906494, "step": 1212 }, { "epoch": 3.3210130047912387, "grad_norm": 2.3759922981262207, "learning_rate": 8.338356164383561e-07, "log_odds_chosen": 0.9246559143066406, "log_odds_ratio": -0.5325035452842712, "logits/chosen": 0.6598254442214966, "logits/rejected": 0.6075617074966431, "logps/chosen": -2.1772115230560303, "logps/rejected": -3.034385919570923, "loss": 0.8824, "nll_loss": 0.8291664719581604, "rewards/accuracies": 0.625, "rewards/chosen": -0.2177211493253708, "rewards/margins": 0.08571743965148926, "rewards/rejected": -0.30343860387802124, "step": 1213 }, { "epoch": 3.323750855578371, "grad_norm": 2.3247287273406982, "learning_rate": 8.336986301369863e-07, "log_odds_chosen": 0.7786041498184204, "log_odds_ratio": -0.46036723256111145, "logits/chosen": 0.9570003747940063, "logits/rejected": 1.001780390739441, "logps/chosen": -2.0217254161834717, "logps/rejected": -2.7200520038604736, "loss": 0.746, "nll_loss": 0.6999830007553101, "rewards/accuracies": 0.875, "rewards/chosen": -0.20217253267765045, "rewards/margins": 0.06983266770839691, "rewards/rejected": -0.27200520038604736, "step": 1214 }, { "epoch": 3.3264887063655033, "grad_norm": 1.9786789417266846, "learning_rate": 8.335616438356165e-07, "log_odds_chosen": 1.0994441509246826, "log_odds_ratio": -0.3664846420288086, "logits/chosen": 0.6923380494117737, "logits/rejected": 0.6253545880317688, "logps/chosen": -1.8124014139175415, "logps/rejected": -2.801753520965576, "loss": 0.9148, "nll_loss": 0.8781492710113525, "rewards/accuracies": 0.75, "rewards/chosen": -0.18124014139175415, "rewards/margins": 0.09893520921468735, "rewards/rejected": -0.2801753580570221, "step": 1215 }, { "epoch": 3.3292265571526354, "grad_norm": 3.726116418838501, "learning_rate": 8.334246575342466e-07, "log_odds_chosen": 0.2587721645832062, "log_odds_ratio": -0.7955515384674072, "logits/chosen": 0.7460461854934692, "logits/rejected": 0.7332026958465576, "logps/chosen": -3.2008843421936035, "logps/rejected": -3.39151668548584, "loss": 0.9465, "nll_loss": 0.866955041885376, "rewards/accuracies": 0.5, "rewards/chosen": -0.3200884461402893, "rewards/margins": 0.01906321384012699, "rewards/rejected": -0.33915165066719055, "step": 1216 }, { "epoch": 3.3319644079397674, "grad_norm": 2.542954683303833, "learning_rate": 8.332876712328767e-07, "log_odds_chosen": 0.47152987122535706, "log_odds_ratio": -0.6287305355072021, "logits/chosen": 0.5941840410232544, "logits/rejected": 0.6143350005149841, "logps/chosen": -2.2815728187561035, "logps/rejected": -2.7201364040374756, "loss": 0.9265, "nll_loss": 0.8636119961738586, "rewards/accuracies": 0.75, "rewards/chosen": -0.22815731167793274, "rewards/margins": 0.04385637491941452, "rewards/rejected": -0.27201366424560547, "step": 1217 }, { "epoch": 3.3347022587268995, "grad_norm": 2.348325729370117, "learning_rate": 8.331506849315069e-07, "log_odds_chosen": 2.120767831802368, "log_odds_ratio": -0.3870980739593506, "logits/chosen": 0.7107329368591309, "logits/rejected": 0.6238251328468323, "logps/chosen": -2.048450231552124, "logps/rejected": -3.950662612915039, "loss": 0.8012, "nll_loss": 0.7625350952148438, "rewards/accuracies": 0.75, "rewards/chosen": -0.20484501123428345, "rewards/margins": 0.19022125005722046, "rewards/rejected": -0.3950662612915039, "step": 1218 }, { "epoch": 3.3374401095140316, "grad_norm": 2.87440824508667, "learning_rate": 8.33013698630137e-07, "log_odds_chosen": 1.0098625421524048, "log_odds_ratio": -0.3546123504638672, "logits/chosen": 0.8861855268478394, "logits/rejected": 0.885624885559082, "logps/chosen": -2.387281656265259, "logps/rejected": -3.3223962783813477, "loss": 0.8623, "nll_loss": 0.8267897367477417, "rewards/accuracies": 1.0, "rewards/chosen": -0.23872815072536469, "rewards/margins": 0.09351148456335068, "rewards/rejected": -0.33223962783813477, "step": 1219 }, { "epoch": 3.3401779603011637, "grad_norm": 3.3146231174468994, "learning_rate": 8.328767123287671e-07, "log_odds_chosen": 0.3940516710281372, "log_odds_ratio": -1.0233736038208008, "logits/chosen": 0.829848051071167, "logits/rejected": 0.8810734748840332, "logps/chosen": -3.0042033195495605, "logps/rejected": -3.393815517425537, "loss": 0.885, "nll_loss": 0.7826734781265259, "rewards/accuracies": 0.5, "rewards/chosen": -0.3004203140735626, "rewards/margins": 0.03896123170852661, "rewards/rejected": -0.33938154578208923, "step": 1220 }, { "epoch": 3.3429158110882957, "grad_norm": 2.4731578826904297, "learning_rate": 8.327397260273972e-07, "log_odds_chosen": 0.9399216175079346, "log_odds_ratio": -0.3753328323364258, "logits/chosen": 1.095128059387207, "logits/rejected": 1.149040699005127, "logps/chosen": -2.6403017044067383, "logps/rejected": -3.5402121543884277, "loss": 0.7711, "nll_loss": 0.7335496544837952, "rewards/accuracies": 0.875, "rewards/chosen": -0.26403018832206726, "rewards/margins": 0.0899910107254982, "rewards/rejected": -0.35402122139930725, "step": 1221 }, { "epoch": 3.345653661875428, "grad_norm": 2.3163001537323, "learning_rate": 8.326027397260274e-07, "log_odds_chosen": 1.191306233406067, "log_odds_ratio": -0.37237367033958435, "logits/chosen": 0.4842531979084015, "logits/rejected": 0.47147369384765625, "logps/chosen": -1.9820046424865723, "logps/rejected": -3.0509095191955566, "loss": 0.9083, "nll_loss": 0.8710247874259949, "rewards/accuracies": 0.875, "rewards/chosen": -0.19820046424865723, "rewards/margins": 0.1068904772400856, "rewards/rejected": -0.30509093403816223, "step": 1222 }, { "epoch": 3.34839151266256, "grad_norm": 2.981706142425537, "learning_rate": 8.324657534246576e-07, "log_odds_chosen": 1.3413320779800415, "log_odds_ratio": -0.5332163572311401, "logits/chosen": 0.8319956064224243, "logits/rejected": 0.8621603846549988, "logps/chosen": -2.838371753692627, "logps/rejected": -4.095016002655029, "loss": 0.8213, "nll_loss": 0.7679872512817383, "rewards/accuracies": 0.75, "rewards/chosen": -0.2838371992111206, "rewards/margins": 0.12566441297531128, "rewards/rejected": -0.4095016419887543, "step": 1223 }, { "epoch": 3.351129363449692, "grad_norm": 2.5240280628204346, "learning_rate": 8.323287671232876e-07, "log_odds_chosen": 0.6818889379501343, "log_odds_ratio": -0.5278107523918152, "logits/chosen": 0.8537366390228271, "logits/rejected": 0.8414871692657471, "logps/chosen": -2.71335506439209, "logps/rejected": -3.312084436416626, "loss": 0.8325, "nll_loss": 0.7796935439109802, "rewards/accuracies": 0.875, "rewards/chosen": -0.27133551239967346, "rewards/margins": 0.05987294763326645, "rewards/rejected": -0.3312084674835205, "step": 1224 }, { "epoch": 3.353867214236824, "grad_norm": 2.1602697372436523, "learning_rate": 8.321917808219178e-07, "log_odds_chosen": 1.3853120803833008, "log_odds_ratio": -0.2454654574394226, "logits/chosen": 0.9981167316436768, "logits/rejected": 1.0134010314941406, "logps/chosen": -2.059699773788452, "logps/rejected": -3.3376646041870117, "loss": 0.7689, "nll_loss": 0.7444010972976685, "rewards/accuracies": 1.0, "rewards/chosen": -0.20596998929977417, "rewards/margins": 0.127796471118927, "rewards/rejected": -0.33376646041870117, "step": 1225 }, { "epoch": 3.356605065023956, "grad_norm": 2.8250532150268555, "learning_rate": 8.32054794520548e-07, "log_odds_chosen": 0.6516003608703613, "log_odds_ratio": -0.5251897573471069, "logits/chosen": 0.6436904072761536, "logits/rejected": 0.6188087463378906, "logps/chosen": -2.474392890930176, "logps/rejected": -3.0252442359924316, "loss": 0.9146, "nll_loss": 0.8621238470077515, "rewards/accuracies": 0.625, "rewards/chosen": -0.24743932485580444, "rewards/margins": 0.055085115134716034, "rewards/rejected": -0.3025244176387787, "step": 1226 }, { "epoch": 3.359342915811088, "grad_norm": 2.3417277336120605, "learning_rate": 8.31917808219178e-07, "log_odds_chosen": 1.3470818996429443, "log_odds_ratio": -0.4696052074432373, "logits/chosen": 0.6829222440719604, "logits/rejected": 0.6723858118057251, "logps/chosen": -2.3104262351989746, "logps/rejected": -3.579437255859375, "loss": 0.8609, "nll_loss": 0.8139723539352417, "rewards/accuracies": 0.75, "rewards/chosen": -0.23104263842105865, "rewards/margins": 0.12690109014511108, "rewards/rejected": -0.35794371366500854, "step": 1227 }, { "epoch": 3.36208076659822, "grad_norm": 4.704668045043945, "learning_rate": 8.317808219178082e-07, "log_odds_chosen": 0.3528404235839844, "log_odds_ratio": -0.6351265907287598, "logits/chosen": 0.7420560121536255, "logits/rejected": 0.7381861805915833, "logps/chosen": -2.67132568359375, "logps/rejected": -2.9842257499694824, "loss": 0.8877, "nll_loss": 0.8242244720458984, "rewards/accuracies": 0.625, "rewards/chosen": -0.26713258028030396, "rewards/margins": 0.03128998726606369, "rewards/rejected": -0.29842254519462585, "step": 1228 }, { "epoch": 3.3648186173853523, "grad_norm": 2.0700082778930664, "learning_rate": 8.316438356164384e-07, "log_odds_chosen": 1.4394664764404297, "log_odds_ratio": -0.3156837821006775, "logits/chosen": 0.7427552342414856, "logits/rejected": 0.7324544191360474, "logps/chosen": -1.530600666999817, "logps/rejected": -2.7694544792175293, "loss": 0.8234, "nll_loss": 0.7918000817298889, "rewards/accuracies": 1.0, "rewards/chosen": -0.15306007862091064, "rewards/margins": 0.12388534843921661, "rewards/rejected": -0.27694541215896606, "step": 1229 }, { "epoch": 3.3675564681724848, "grad_norm": 2.183354616165161, "learning_rate": 8.315068493150684e-07, "log_odds_chosen": 1.2468509674072266, "log_odds_ratio": -0.31107139587402344, "logits/chosen": 0.62375807762146, "logits/rejected": 0.5276333093643188, "logps/chosen": -1.7143754959106445, "logps/rejected": -2.8082313537597656, "loss": 0.908, "nll_loss": 0.8769158720970154, "rewards/accuracies": 1.0, "rewards/chosen": -0.17143754661083221, "rewards/margins": 0.10938558727502823, "rewards/rejected": -0.28082311153411865, "step": 1230 }, { "epoch": 3.370294318959617, "grad_norm": 2.492249011993408, "learning_rate": 8.313698630136986e-07, "log_odds_chosen": 1.146916389465332, "log_odds_ratio": -0.47074171900749207, "logits/chosen": 0.7929707765579224, "logits/rejected": 0.7792991995811462, "logps/chosen": -2.381434202194214, "logps/rejected": -3.414945363998413, "loss": 0.904, "nll_loss": 0.8568786978721619, "rewards/accuracies": 0.75, "rewards/chosen": -0.2381434142589569, "rewards/margins": 0.10335110127925873, "rewards/rejected": -0.34149450063705444, "step": 1231 }, { "epoch": 3.373032169746749, "grad_norm": 2.650848388671875, "learning_rate": 8.312328767123288e-07, "log_odds_chosen": 0.8865166306495667, "log_odds_ratio": -0.5049892663955688, "logits/chosen": 0.6473735570907593, "logits/rejected": 0.7276383638381958, "logps/chosen": -2.3803510665893555, "logps/rejected": -3.255674123764038, "loss": 0.8624, "nll_loss": 0.8118977546691895, "rewards/accuracies": 0.625, "rewards/chosen": -0.23803512752056122, "rewards/margins": 0.08753229677677155, "rewards/rejected": -0.32556742429733276, "step": 1232 }, { "epoch": 3.375770020533881, "grad_norm": 2.168408155441284, "learning_rate": 8.310958904109589e-07, "log_odds_chosen": 1.2530041933059692, "log_odds_ratio": -0.3484462797641754, "logits/chosen": 0.9409211874008179, "logits/rejected": 0.9248755574226379, "logps/chosen": -1.6801655292510986, "logps/rejected": -2.8030872344970703, "loss": 0.7701, "nll_loss": 0.7352192997932434, "rewards/accuracies": 0.75, "rewards/chosen": -0.16801655292510986, "rewards/margins": 0.11229216307401657, "rewards/rejected": -0.28030872344970703, "step": 1233 }, { "epoch": 3.378507871321013, "grad_norm": 2.300795555114746, "learning_rate": 8.30958904109589e-07, "log_odds_chosen": 0.5888192057609558, "log_odds_ratio": -0.49776923656463623, "logits/chosen": 0.7822348475456238, "logits/rejected": 0.7535536289215088, "logps/chosen": -1.995782732963562, "logps/rejected": -2.509417772293091, "loss": 0.8467, "nll_loss": 0.796911895275116, "rewards/accuracies": 0.75, "rewards/chosen": -0.19957828521728516, "rewards/margins": 0.051363494247198105, "rewards/rejected": -0.25094175338745117, "step": 1234 }, { "epoch": 3.381245722108145, "grad_norm": 2.1162335872650146, "learning_rate": 8.308219178082191e-07, "log_odds_chosen": 0.9688640832901001, "log_odds_ratio": -0.3657122254371643, "logits/chosen": 0.6996610164642334, "logits/rejected": 0.6690460443496704, "logps/chosen": -2.172696113586426, "logps/rejected": -3.0750017166137695, "loss": 0.873, "nll_loss": 0.83645099401474, "rewards/accuracies": 0.875, "rewards/chosen": -0.21726961433887482, "rewards/margins": 0.09023058414459229, "rewards/rejected": -0.3075001835823059, "step": 1235 }, { "epoch": 3.383983572895277, "grad_norm": 2.314788579940796, "learning_rate": 8.306849315068493e-07, "log_odds_chosen": 1.495283842086792, "log_odds_ratio": -0.3980933725833893, "logits/chosen": 0.8048758506774902, "logits/rejected": 0.7809277176856995, "logps/chosen": -2.1229798793792725, "logps/rejected": -3.55295467376709, "loss": 0.8448, "nll_loss": 0.8049901723861694, "rewards/accuracies": 0.875, "rewards/chosen": -0.21229799091815948, "rewards/margins": 0.14299747347831726, "rewards/rejected": -0.35529544949531555, "step": 1236 }, { "epoch": 3.3867214236824092, "grad_norm": 2.346449136734009, "learning_rate": 8.305479452054795e-07, "log_odds_chosen": 1.3288803100585938, "log_odds_ratio": -0.3954463601112366, "logits/chosen": 0.8768627047538757, "logits/rejected": 0.8359328508377075, "logps/chosen": -2.396672248840332, "logps/rejected": -3.6588492393493652, "loss": 0.8736, "nll_loss": 0.834078311920166, "rewards/accuracies": 0.75, "rewards/chosen": -0.23966723680496216, "rewards/margins": 0.12621767818927765, "rewards/rejected": -0.3658849000930786, "step": 1237 }, { "epoch": 3.3894592744695413, "grad_norm": 3.588554620742798, "learning_rate": 8.304109589041095e-07, "log_odds_chosen": 1.08683180809021, "log_odds_ratio": -0.377911239862442, "logits/chosen": 0.7789319157600403, "logits/rejected": 0.7821810841560364, "logps/chosen": -3.1090316772460938, "logps/rejected": -4.108829975128174, "loss": 0.8324, "nll_loss": 0.7946386337280273, "rewards/accuracies": 0.875, "rewards/chosen": -0.3109032213687897, "rewards/margins": 0.09997981786727905, "rewards/rejected": -0.4108830392360687, "step": 1238 }, { "epoch": 3.3921971252566734, "grad_norm": 2.005687713623047, "learning_rate": 8.302739726027397e-07, "log_odds_chosen": 1.1139841079711914, "log_odds_ratio": -0.36152586340904236, "logits/chosen": 0.7961388230323792, "logits/rejected": 0.7712677121162415, "logps/chosen": -1.8958096504211426, "logps/rejected": -2.9266486167907715, "loss": 0.7958, "nll_loss": 0.7596966028213501, "rewards/accuracies": 0.875, "rewards/chosen": -0.18958094716072083, "rewards/margins": 0.10308392345905304, "rewards/rejected": -0.29266488552093506, "step": 1239 }, { "epoch": 3.3949349760438055, "grad_norm": 2.1370010375976562, "learning_rate": 8.301369863013699e-07, "log_odds_chosen": 0.9104839563369751, "log_odds_ratio": -0.36922287940979004, "logits/chosen": 0.8525348901748657, "logits/rejected": 0.8222657442092896, "logps/chosen": -2.12730073928833, "logps/rejected": -2.9458107948303223, "loss": 0.8346, "nll_loss": 0.7976847887039185, "rewards/accuracies": 0.875, "rewards/chosen": -0.2127300649881363, "rewards/margins": 0.0818510353565216, "rewards/rejected": -0.2945810854434967, "step": 1240 }, { "epoch": 3.3976728268309375, "grad_norm": 2.222529888153076, "learning_rate": 8.299999999999999e-07, "log_odds_chosen": 1.3169620037078857, "log_odds_ratio": -0.30170291662216187, "logits/chosen": 1.0226526260375977, "logits/rejected": 1.0821808576583862, "logps/chosen": -2.079237937927246, "logps/rejected": -3.2805542945861816, "loss": 0.7063, "nll_loss": 0.6761288642883301, "rewards/accuracies": 0.875, "rewards/chosen": -0.2079237997531891, "rewards/margins": 0.12013164162635803, "rewards/rejected": -0.3280554413795471, "step": 1241 }, { "epoch": 3.40041067761807, "grad_norm": 3.6546530723571777, "learning_rate": 8.298630136986301e-07, "log_odds_chosen": 1.3421744108200073, "log_odds_ratio": -0.4043877124786377, "logits/chosen": 1.08108389377594, "logits/rejected": 1.1369438171386719, "logps/chosen": -2.790104627609253, "logps/rejected": -4.053564071655273, "loss": 0.8848, "nll_loss": 0.8443794250488281, "rewards/accuracies": 0.625, "rewards/chosen": -0.27901047468185425, "rewards/margins": 0.1263459175825119, "rewards/rejected": -0.40535637736320496, "step": 1242 }, { "epoch": 3.403148528405202, "grad_norm": 3.018545389175415, "learning_rate": 8.297260273972603e-07, "log_odds_chosen": -0.19785934686660767, "log_odds_ratio": -1.0224802494049072, "logits/chosen": 0.7184044122695923, "logits/rejected": 0.6865323185920715, "logps/chosen": -3.104365587234497, "logps/rejected": -2.812291145324707, "loss": 0.9486, "nll_loss": 0.8463922142982483, "rewards/accuracies": 0.625, "rewards/chosen": -0.31043654680252075, "rewards/margins": -0.02920745685696602, "rewards/rejected": -0.28122907876968384, "step": 1243 }, { "epoch": 3.405886379192334, "grad_norm": 2.037609815597534, "learning_rate": 8.295890410958903e-07, "log_odds_chosen": 2.106537342071533, "log_odds_ratio": -0.35183262825012207, "logits/chosen": 0.5193108916282654, "logits/rejected": 0.4803316593170166, "logps/chosen": -1.739093542098999, "logps/rejected": -3.6718549728393555, "loss": 0.8693, "nll_loss": 0.8340811729431152, "rewards/accuracies": 0.875, "rewards/chosen": -0.17390936613082886, "rewards/margins": 0.19327613711357117, "rewards/rejected": -0.3671855032444, "step": 1244 }, { "epoch": 3.4086242299794662, "grad_norm": 2.196171283721924, "learning_rate": 8.294520547945205e-07, "log_odds_chosen": 1.3106212615966797, "log_odds_ratio": -0.3431777358055115, "logits/chosen": 0.7286394834518433, "logits/rejected": 0.7255004644393921, "logps/chosen": -1.770395278930664, "logps/rejected": -2.8846335411071777, "loss": 0.763, "nll_loss": 0.7286980748176575, "rewards/accuracies": 0.875, "rewards/chosen": -0.17703953385353088, "rewards/margins": 0.11142385751008987, "rewards/rejected": -0.28846338391304016, "step": 1245 }, { "epoch": 3.4113620807665983, "grad_norm": 2.1805315017700195, "learning_rate": 8.293150684931507e-07, "log_odds_chosen": 1.9001576900482178, "log_odds_ratio": -0.23124349117279053, "logits/chosen": 0.8880255222320557, "logits/rejected": 0.8869091272354126, "logps/chosen": -3.1535704135894775, "logps/rejected": -4.950113773345947, "loss": 0.877, "nll_loss": 0.8538299798965454, "rewards/accuracies": 1.0, "rewards/chosen": -0.3153570294380188, "rewards/margins": 0.17965435981750488, "rewards/rejected": -0.4950113892555237, "step": 1246 }, { "epoch": 3.4140999315537304, "grad_norm": 3.012321710586548, "learning_rate": 8.291780821917808e-07, "log_odds_chosen": 1.3271936178207397, "log_odds_ratio": -0.5532675981521606, "logits/chosen": 0.5627219676971436, "logits/rejected": 0.491659939289093, "logps/chosen": -2.316188335418701, "logps/rejected": -3.5412240028381348, "loss": 0.8461, "nll_loss": 0.7908177971839905, "rewards/accuracies": 0.875, "rewards/chosen": -0.23161882162094116, "rewards/margins": 0.12250355631113052, "rewards/rejected": -0.3541224002838135, "step": 1247 }, { "epoch": 3.4168377823408624, "grad_norm": 3.61415958404541, "learning_rate": 8.290410958904109e-07, "log_odds_chosen": 0.874326765537262, "log_odds_ratio": -0.518968403339386, "logits/chosen": 0.8511185646057129, "logits/rejected": 0.830352783203125, "logps/chosen": -2.152770519256592, "logps/rejected": -2.872257709503174, "loss": 0.8173, "nll_loss": 0.7654380202293396, "rewards/accuracies": 0.875, "rewards/chosen": -0.2152770459651947, "rewards/margins": 0.07194875180721283, "rewards/rejected": -0.28722578287124634, "step": 1248 }, { "epoch": 3.4195756331279945, "grad_norm": 2.7230398654937744, "learning_rate": 8.289041095890411e-07, "log_odds_chosen": 1.2730557918548584, "log_odds_ratio": -0.4484260678291321, "logits/chosen": 0.6256023645401001, "logits/rejected": 0.6106564402580261, "logps/chosen": -1.821077585220337, "logps/rejected": -2.972275972366333, "loss": 0.8838, "nll_loss": 0.838984489440918, "rewards/accuracies": 0.875, "rewards/chosen": -0.18210777640342712, "rewards/margins": 0.1151198148727417, "rewards/rejected": -0.2972275912761688, "step": 1249 }, { "epoch": 3.4223134839151266, "grad_norm": 2.3557987213134766, "learning_rate": 8.287671232876712e-07, "log_odds_chosen": 1.8838316202163696, "log_odds_ratio": -0.2343878298997879, "logits/chosen": 0.809391975402832, "logits/rejected": 0.7490387558937073, "logps/chosen": -1.7339048385620117, "logps/rejected": -3.440974712371826, "loss": 0.7531, "nll_loss": 0.7296152114868164, "rewards/accuracies": 0.875, "rewards/chosen": -0.1733904778957367, "rewards/margins": 0.17070698738098145, "rewards/rejected": -0.34409746527671814, "step": 1250 }, { "epoch": 3.4250513347022586, "grad_norm": 2.4534823894500732, "learning_rate": 8.286301369863013e-07, "log_odds_chosen": 1.0400090217590332, "log_odds_ratio": -0.4630575180053711, "logits/chosen": 0.8197434544563293, "logits/rejected": 0.7544080018997192, "logps/chosen": -2.1793787479400635, "logps/rejected": -3.1446352005004883, "loss": 0.89, "nll_loss": 0.8436585068702698, "rewards/accuracies": 0.625, "rewards/chosen": -0.2179378867149353, "rewards/margins": 0.0965256541967392, "rewards/rejected": -0.3144635260105133, "step": 1251 }, { "epoch": 3.4277891854893907, "grad_norm": 2.420848846435547, "learning_rate": 8.284931506849314e-07, "log_odds_chosen": 1.1968133449554443, "log_odds_ratio": -0.37604913115501404, "logits/chosen": 0.738876223564148, "logits/rejected": 0.7057523131370544, "logps/chosen": -2.0660343170166016, "logps/rejected": -3.126511335372925, "loss": 0.7866, "nll_loss": 0.7490071058273315, "rewards/accuracies": 0.75, "rewards/chosen": -0.20660343766212463, "rewards/margins": 0.10604768991470337, "rewards/rejected": -0.312651127576828, "step": 1252 }, { "epoch": 3.430527036276523, "grad_norm": 2.9334776401519775, "learning_rate": 8.283561643835616e-07, "log_odds_chosen": 0.7859593629837036, "log_odds_ratio": -0.48822349309921265, "logits/chosen": 0.7652665376663208, "logits/rejected": 0.7437883019447327, "logps/chosen": -2.203979253768921, "logps/rejected": -2.8876256942749023, "loss": 0.7915, "nll_loss": 0.742672324180603, "rewards/accuracies": 0.75, "rewards/chosen": -0.2203979343175888, "rewards/margins": 0.06836463510990143, "rewards/rejected": -0.28876256942749023, "step": 1253 }, { "epoch": 3.433264887063655, "grad_norm": 2.4145290851593018, "learning_rate": 8.282191780821918e-07, "log_odds_chosen": 0.898912787437439, "log_odds_ratio": -0.404106080532074, "logits/chosen": 0.5430599451065063, "logits/rejected": 0.4607906937599182, "logps/chosen": -1.7281005382537842, "logps/rejected": -2.48608136177063, "loss": 0.8964, "nll_loss": 0.8560137152671814, "rewards/accuracies": 0.875, "rewards/chosen": -0.17281004786491394, "rewards/margins": 0.07579807192087173, "rewards/rejected": -0.24860814213752747, "step": 1254 }, { "epoch": 3.436002737850787, "grad_norm": 3.778299570083618, "learning_rate": 8.280821917808218e-07, "log_odds_chosen": 0.19564758241176605, "log_odds_ratio": -0.7182397842407227, "logits/chosen": 0.5819387435913086, "logits/rejected": 0.504424512386322, "logps/chosen": -1.8865118026733398, "logps/rejected": -2.0327255725860596, "loss": 0.8999, "nll_loss": 0.8280968070030212, "rewards/accuracies": 0.75, "rewards/chosen": -0.1886512041091919, "rewards/margins": 0.01462137047201395, "rewards/rejected": -0.20327255129814148, "step": 1255 }, { "epoch": 3.4387405886379194, "grad_norm": 2.068707227706909, "learning_rate": 8.27945205479452e-07, "log_odds_chosen": 2.7267844676971436, "log_odds_ratio": -0.266546368598938, "logits/chosen": 1.0465924739837646, "logits/rejected": 1.0738158226013184, "logps/chosen": -2.2619965076446533, "logps/rejected": -4.869300842285156, "loss": 0.7707, "nll_loss": 0.7440636157989502, "rewards/accuracies": 0.875, "rewards/chosen": -0.2261996567249298, "rewards/margins": 0.26073044538497925, "rewards/rejected": -0.48693007230758667, "step": 1256 }, { "epoch": 3.4414784394250515, "grad_norm": 3.319751262664795, "learning_rate": 8.278082191780822e-07, "log_odds_chosen": 1.7658079862594604, "log_odds_ratio": -0.2397708296775818, "logits/chosen": 0.8535101413726807, "logits/rejected": 0.8518271446228027, "logps/chosen": -2.287409782409668, "logps/rejected": -3.919039249420166, "loss": 0.9183, "nll_loss": 0.8942817449569702, "rewards/accuracies": 0.875, "rewards/chosen": -0.22874096035957336, "rewards/margins": 0.1631629765033722, "rewards/rejected": -0.39190393686294556, "step": 1257 }, { "epoch": 3.4442162902121836, "grad_norm": 1.9607161283493042, "learning_rate": 8.276712328767122e-07, "log_odds_chosen": 1.3711354732513428, "log_odds_ratio": -0.35700762271881104, "logits/chosen": 0.7806969881057739, "logits/rejected": 0.7726144790649414, "logps/chosen": -1.771597981452942, "logps/rejected": -3.041382312774658, "loss": 0.8454, "nll_loss": 0.8096495866775513, "rewards/accuracies": 0.875, "rewards/chosen": -0.17715978622436523, "rewards/margins": 0.12697844207286835, "rewards/rejected": -0.3041382431983948, "step": 1258 }, { "epoch": 3.4469541409993156, "grad_norm": 2.632216215133667, "learning_rate": 8.275342465753424e-07, "log_odds_chosen": 1.0436772108078003, "log_odds_ratio": -0.47662633657455444, "logits/chosen": 0.9919990301132202, "logits/rejected": 0.956932783126831, "logps/chosen": -2.7192788124084473, "logps/rejected": -3.684509515762329, "loss": 0.8488, "nll_loss": 0.8010959029197693, "rewards/accuracies": 0.875, "rewards/chosen": -0.2719278931617737, "rewards/margins": 0.09652309119701385, "rewards/rejected": -0.36845099925994873, "step": 1259 }, { "epoch": 3.4496919917864477, "grad_norm": 2.648003101348877, "learning_rate": 8.273972602739726e-07, "log_odds_chosen": 0.8078386187553406, "log_odds_ratio": -0.5049073696136475, "logits/chosen": 0.9067462086677551, "logits/rejected": 0.905649721622467, "logps/chosen": -2.548353910446167, "logps/rejected": -3.2929558753967285, "loss": 0.8234, "nll_loss": 0.7729470729827881, "rewards/accuracies": 0.75, "rewards/chosen": -0.2548353672027588, "rewards/margins": 0.07446019351482391, "rewards/rejected": -0.3292956054210663, "step": 1260 }, { "epoch": 3.4524298425735798, "grad_norm": 2.229628324508667, "learning_rate": 8.272602739726027e-07, "log_odds_chosen": 1.4860005378723145, "log_odds_ratio": -0.2490578591823578, "logits/chosen": 0.9234822392463684, "logits/rejected": 0.9367572069168091, "logps/chosen": -1.8730299472808838, "logps/rejected": -3.2105979919433594, "loss": 0.7055, "nll_loss": 0.680567741394043, "rewards/accuracies": 1.0, "rewards/chosen": -0.18730300664901733, "rewards/margins": 0.1337568163871765, "rewards/rejected": -0.32105982303619385, "step": 1261 }, { "epoch": 3.455167693360712, "grad_norm": 2.432957172393799, "learning_rate": 8.271232876712328e-07, "log_odds_chosen": 1.9508450031280518, "log_odds_ratio": -0.25218069553375244, "logits/chosen": 0.5576062798500061, "logits/rejected": 0.5190932154655457, "logps/chosen": -1.5962696075439453, "logps/rejected": -3.350313663482666, "loss": 0.821, "nll_loss": 0.7957503199577332, "rewards/accuracies": 0.875, "rewards/chosen": -0.15962697565555573, "rewards/margins": 0.17540442943572998, "rewards/rejected": -0.3350313901901245, "step": 1262 }, { "epoch": 3.457905544147844, "grad_norm": 2.327777624130249, "learning_rate": 8.26986301369863e-07, "log_odds_chosen": 0.7146440148353577, "log_odds_ratio": -0.452269047498703, "logits/chosen": 0.6382803320884705, "logits/rejected": 0.6567836999893188, "logps/chosen": -2.2150521278381348, "logps/rejected": -2.846245288848877, "loss": 0.8543, "nll_loss": 0.8090269565582275, "rewards/accuracies": 0.75, "rewards/chosen": -0.22150520980358124, "rewards/margins": 0.0631193295121193, "rewards/rejected": -0.28462454676628113, "step": 1263 }, { "epoch": 3.460643394934976, "grad_norm": 2.5656559467315674, "learning_rate": 8.268493150684931e-07, "log_odds_chosen": 0.17674005031585693, "log_odds_ratio": -0.660514235496521, "logits/chosen": 0.5750352740287781, "logits/rejected": 0.524946928024292, "logps/chosen": -2.3528409004211426, "logps/rejected": -2.503260374069214, "loss": 0.9076, "nll_loss": 0.841575026512146, "rewards/accuracies": 0.625, "rewards/chosen": -0.23528407514095306, "rewards/margins": 0.015041966922581196, "rewards/rejected": -0.2503260374069214, "step": 1264 }, { "epoch": 3.463381245722108, "grad_norm": 2.3039443492889404, "learning_rate": 8.267123287671232e-07, "log_odds_chosen": 0.9885016083717346, "log_odds_ratio": -0.34508469700813293, "logits/chosen": 0.8473895192146301, "logits/rejected": 0.8480726480484009, "logps/chosen": -1.850754976272583, "logps/rejected": -2.698065996170044, "loss": 0.9037, "nll_loss": 0.8691529035568237, "rewards/accuracies": 1.0, "rewards/chosen": -0.18507549166679382, "rewards/margins": 0.0847310945391655, "rewards/rejected": -0.2698065936565399, "step": 1265 }, { "epoch": 3.46611909650924, "grad_norm": 3.1652650833129883, "learning_rate": 8.265753424657533e-07, "log_odds_chosen": 0.8655803799629211, "log_odds_ratio": -0.5555886626243591, "logits/chosen": 0.751920759677887, "logits/rejected": 0.6868468523025513, "logps/chosen": -2.381319761276245, "logps/rejected": -3.131816864013672, "loss": 0.9271, "nll_loss": 0.8714988231658936, "rewards/accuracies": 0.875, "rewards/chosen": -0.23813198506832123, "rewards/margins": 0.07504969090223312, "rewards/rejected": -0.31318169832229614, "step": 1266 }, { "epoch": 3.468856947296372, "grad_norm": 4.430673122406006, "learning_rate": 8.264383561643835e-07, "log_odds_chosen": 1.0356318950653076, "log_odds_ratio": -0.5242565870285034, "logits/chosen": 0.846087634563446, "logits/rejected": 0.8580395579338074, "logps/chosen": -2.681033134460449, "logps/rejected": -3.6572139263153076, "loss": 0.9267, "nll_loss": 0.8742902874946594, "rewards/accuracies": 0.75, "rewards/chosen": -0.26810330152511597, "rewards/margins": 0.09761808812618256, "rewards/rejected": -0.3657214045524597, "step": 1267 }, { "epoch": 3.4715947980835047, "grad_norm": 3.442899227142334, "learning_rate": 8.263013698630137e-07, "log_odds_chosen": 1.6446177959442139, "log_odds_ratio": -0.289230078458786, "logits/chosen": 0.9845463037490845, "logits/rejected": 1.0211482048034668, "logps/chosen": -2.5695080757141113, "logps/rejected": -4.123910427093506, "loss": 0.8288, "nll_loss": 0.7998426556587219, "rewards/accuracies": 1.0, "rewards/chosen": -0.25695082545280457, "rewards/margins": 0.15544021129608154, "rewards/rejected": -0.4123910367488861, "step": 1268 }, { "epoch": 3.4743326488706368, "grad_norm": 2.2350099086761475, "learning_rate": 8.261643835616437e-07, "log_odds_chosen": 1.861644983291626, "log_odds_ratio": -0.2644125521183014, "logits/chosen": 0.5820354223251343, "logits/rejected": 0.5680118799209595, "logps/chosen": -2.047880172729492, "logps/rejected": -3.7564892768859863, "loss": 0.8256, "nll_loss": 0.7991988062858582, "rewards/accuracies": 0.875, "rewards/chosen": -0.20478801429271698, "rewards/margins": 0.1708609014749527, "rewards/rejected": -0.3756489157676697, "step": 1269 }, { "epoch": 3.477070499657769, "grad_norm": 1.9398947954177856, "learning_rate": 8.260273972602739e-07, "log_odds_chosen": 1.9226396083831787, "log_odds_ratio": -0.22825375199317932, "logits/chosen": 0.5495505332946777, "logits/rejected": 0.4785865843296051, "logps/chosen": -1.7394769191741943, "logps/rejected": -3.504657745361328, "loss": 0.8755, "nll_loss": 0.8527211546897888, "rewards/accuracies": 1.0, "rewards/chosen": -0.17394770681858063, "rewards/margins": 0.17651808261871338, "rewards/rejected": -0.3504657745361328, "step": 1270 }, { "epoch": 3.479808350444901, "grad_norm": 2.7666492462158203, "learning_rate": 8.258904109589041e-07, "log_odds_chosen": 0.6423715353012085, "log_odds_ratio": -0.6139419078826904, "logits/chosen": 0.7078778147697449, "logits/rejected": 0.704402506351471, "logps/chosen": -2.4498987197875977, "logps/rejected": -2.9688711166381836, "loss": 0.8057, "nll_loss": 0.7443538904190063, "rewards/accuracies": 0.625, "rewards/chosen": -0.24498988687992096, "rewards/margins": 0.051897235214710236, "rewards/rejected": -0.2968870997428894, "step": 1271 }, { "epoch": 3.482546201232033, "grad_norm": 1.9936109781265259, "learning_rate": 8.257534246575341e-07, "log_odds_chosen": 1.521902322769165, "log_odds_ratio": -0.3331688642501831, "logits/chosen": 0.7043697237968445, "logits/rejected": 0.6648871302604675, "logps/chosen": -1.9452944993972778, "logps/rejected": -3.2955541610717773, "loss": 0.8631, "nll_loss": 0.829783022403717, "rewards/accuracies": 0.875, "rewards/chosen": -0.1945294439792633, "rewards/margins": 0.13502594828605652, "rewards/rejected": -0.3295553922653198, "step": 1272 }, { "epoch": 3.485284052019165, "grad_norm": 3.552649736404419, "learning_rate": 8.256164383561643e-07, "log_odds_chosen": 0.6434583067893982, "log_odds_ratio": -0.545795202255249, "logits/chosen": 1.0842351913452148, "logits/rejected": 1.0547184944152832, "logps/chosen": -2.441983938217163, "logps/rejected": -3.049114465713501, "loss": 0.8839, "nll_loss": 0.8292725682258606, "rewards/accuracies": 0.625, "rewards/chosen": -0.24419838190078735, "rewards/margins": 0.06071306765079498, "rewards/rejected": -0.30491143465042114, "step": 1273 }, { "epoch": 3.488021902806297, "grad_norm": 2.3906729221343994, "learning_rate": 8.254794520547945e-07, "log_odds_chosen": 1.6822445392608643, "log_odds_ratio": -0.2627629041671753, "logits/chosen": 1.150090217590332, "logits/rejected": 1.164180040359497, "logps/chosen": -2.0552523136138916, "logps/rejected": -3.5356178283691406, "loss": 0.8048, "nll_loss": 0.7785055041313171, "rewards/accuracies": 1.0, "rewards/chosen": -0.2055252343416214, "rewards/margins": 0.14803656935691833, "rewards/rejected": -0.3535618185997009, "step": 1274 }, { "epoch": 3.490759753593429, "grad_norm": 2.3232250213623047, "learning_rate": 8.253424657534246e-07, "log_odds_chosen": 0.21612706780433655, "log_odds_ratio": -0.6330969333648682, "logits/chosen": 0.508019208908081, "logits/rejected": 0.5171377658843994, "logps/chosen": -2.001126766204834, "logps/rejected": -2.1843767166137695, "loss": 0.9309, "nll_loss": 0.8676015734672546, "rewards/accuracies": 0.625, "rewards/chosen": -0.20011268556118011, "rewards/margins": 0.018325015902519226, "rewards/rejected": -0.21843770146369934, "step": 1275 }, { "epoch": 3.4934976043805612, "grad_norm": 2.6091527938842773, "learning_rate": 8.252054794520547e-07, "log_odds_chosen": 0.7679307460784912, "log_odds_ratio": -0.4319790005683899, "logits/chosen": 0.5802574157714844, "logits/rejected": 0.5271294116973877, "logps/chosen": -2.3228859901428223, "logps/rejected": -3.017498254776001, "loss": 0.8808, "nll_loss": 0.8375799655914307, "rewards/accuracies": 0.875, "rewards/chosen": -0.23228858411312103, "rewards/margins": 0.06946124136447906, "rewards/rejected": -0.3017498254776001, "step": 1276 }, { "epoch": 3.4962354551676933, "grad_norm": 2.0554115772247314, "learning_rate": 8.250684931506849e-07, "log_odds_chosen": 1.5718328952789307, "log_odds_ratio": -0.24804723262786865, "logits/chosen": 0.7440277934074402, "logits/rejected": 0.7393561601638794, "logps/chosen": -2.5664572715759277, "logps/rejected": -3.9777095317840576, "loss": 0.8591, "nll_loss": 0.8343001008033752, "rewards/accuracies": 1.0, "rewards/chosen": -0.25664573907852173, "rewards/margins": 0.14112520217895508, "rewards/rejected": -0.3977709412574768, "step": 1277 }, { "epoch": 3.4989733059548254, "grad_norm": 2.300654172897339, "learning_rate": 8.24931506849315e-07, "log_odds_chosen": 1.083459734916687, "log_odds_ratio": -0.338183730840683, "logits/chosen": 0.8490296602249146, "logits/rejected": 0.8504325151443481, "logps/chosen": -1.9646143913269043, "logps/rejected": -2.926589250564575, "loss": 0.7621, "nll_loss": 0.7282575368881226, "rewards/accuracies": 0.875, "rewards/chosen": -0.19646145403385162, "rewards/margins": 0.09619749337434769, "rewards/rejected": -0.2926589250564575, "step": 1278 }, { "epoch": 3.5017111567419574, "grad_norm": 2.265087366104126, "learning_rate": 8.247945205479451e-07, "log_odds_chosen": 0.8482575416564941, "log_odds_ratio": -0.5410950779914856, "logits/chosen": 0.6470237970352173, "logits/rejected": 0.6404841542243958, "logps/chosen": -1.7787106037139893, "logps/rejected": -2.5891237258911133, "loss": 0.8969, "nll_loss": 0.8428025245666504, "rewards/accuracies": 0.75, "rewards/chosen": -0.17787104845046997, "rewards/margins": 0.08104130625724792, "rewards/rejected": -0.2589123547077179, "step": 1279 }, { "epoch": 3.5044490075290895, "grad_norm": 2.787665843963623, "learning_rate": 8.246575342465753e-07, "log_odds_chosen": 1.183414340019226, "log_odds_ratio": -0.35374748706817627, "logits/chosen": 0.3650756776332855, "logits/rejected": 0.3181651830673218, "logps/chosen": -2.159674882888794, "logps/rejected": -3.227015972137451, "loss": 0.9077, "nll_loss": 0.8723532557487488, "rewards/accuracies": 1.0, "rewards/chosen": -0.21596749126911163, "rewards/margins": 0.10673411935567856, "rewards/rejected": -0.3227016031742096, "step": 1280 }, { "epoch": 3.5071868583162216, "grad_norm": 2.2516183853149414, "learning_rate": 8.245205479452054e-07, "log_odds_chosen": 1.4717133045196533, "log_odds_ratio": -0.4568895101547241, "logits/chosen": 0.8220527172088623, "logits/rejected": 0.7811391353607178, "logps/chosen": -2.531120777130127, "logps/rejected": -3.9878997802734375, "loss": 0.912, "nll_loss": 0.8663159608840942, "rewards/accuracies": 0.75, "rewards/chosen": -0.2531120777130127, "rewards/margins": 0.14567790925502777, "rewards/rejected": -0.3987899720668793, "step": 1281 }, { "epoch": 3.5099247091033536, "grad_norm": 1.908319354057312, "learning_rate": 8.243835616438356e-07, "log_odds_chosen": 1.452762484550476, "log_odds_ratio": -0.24880963563919067, "logits/chosen": 0.6129873991012573, "logits/rejected": 0.5327731966972351, "logps/chosen": -1.622341275215149, "logps/rejected": -2.9086084365844727, "loss": 0.8419, "nll_loss": 0.8170033693313599, "rewards/accuracies": 1.0, "rewards/chosen": -0.1622341424226761, "rewards/margins": 0.12862670421600342, "rewards/rejected": -0.2908608615398407, "step": 1282 }, { "epoch": 3.5126625598904857, "grad_norm": 3.5277507305145264, "learning_rate": 8.242465753424656e-07, "log_odds_chosen": 0.6795019507408142, "log_odds_ratio": -0.6538563966751099, "logits/chosen": 1.009148359298706, "logits/rejected": 1.0462431907653809, "logps/chosen": -3.408994674682617, "logps/rejected": -4.034535884857178, "loss": 0.8344, "nll_loss": 0.7689847946166992, "rewards/accuracies": 0.75, "rewards/chosen": -0.34089943766593933, "rewards/margins": 0.06255413591861725, "rewards/rejected": -0.4034535884857178, "step": 1283 }, { "epoch": 3.515400410677618, "grad_norm": 3.2487967014312744, "learning_rate": 8.241095890410958e-07, "log_odds_chosen": 0.7984156012535095, "log_odds_ratio": -0.5828573703765869, "logits/chosen": 0.8671538829803467, "logits/rejected": 0.9440411329269409, "logps/chosen": -2.934670925140381, "logps/rejected": -3.6995162963867188, "loss": 0.9387, "nll_loss": 0.8804425001144409, "rewards/accuracies": 0.625, "rewards/chosen": -0.29346710443496704, "rewards/margins": 0.0764845535159111, "rewards/rejected": -0.36995166540145874, "step": 1284 }, { "epoch": 3.5181382614647503, "grad_norm": 1.8928028345108032, "learning_rate": 8.23972602739726e-07, "log_odds_chosen": 1.5277247428894043, "log_odds_ratio": -0.26164156198501587, "logits/chosen": 0.8886783123016357, "logits/rejected": 0.8871265649795532, "logps/chosen": -1.6152803897857666, "logps/rejected": -2.960446834564209, "loss": 0.7518, "nll_loss": 0.7255983948707581, "rewards/accuracies": 1.0, "rewards/chosen": -0.16152805089950562, "rewards/margins": 0.1345166265964508, "rewards/rejected": -0.2960446774959564, "step": 1285 }, { "epoch": 3.5208761122518824, "grad_norm": 2.66265606880188, "learning_rate": 8.23835616438356e-07, "log_odds_chosen": 1.689116358757019, "log_odds_ratio": -0.3020448088645935, "logits/chosen": 1.071631908416748, "logits/rejected": 1.1108416318893433, "logps/chosen": -2.3525309562683105, "logps/rejected": -3.8113532066345215, "loss": 0.6953, "nll_loss": 0.6651392579078674, "rewards/accuracies": 0.875, "rewards/chosen": -0.23525311052799225, "rewards/margins": 0.14588221907615662, "rewards/rejected": -0.38113534450531006, "step": 1286 }, { "epoch": 3.5236139630390144, "grad_norm": 2.3219428062438965, "learning_rate": 8.236986301369862e-07, "log_odds_chosen": 0.9567062854766846, "log_odds_ratio": -0.38939958810806274, "logits/chosen": 0.6829155683517456, "logits/rejected": 0.5495319366455078, "logps/chosen": -1.3925611972808838, "logps/rejected": -2.187856435775757, "loss": 0.9358, "nll_loss": 0.8968181610107422, "rewards/accuracies": 1.0, "rewards/chosen": -0.13925610482692719, "rewards/margins": 0.0795295462012291, "rewards/rejected": -0.21878565847873688, "step": 1287 }, { "epoch": 3.5263518138261465, "grad_norm": 2.8283605575561523, "learning_rate": 8.235616438356165e-07, "log_odds_chosen": 0.3861645758152008, "log_odds_ratio": -0.5618773698806763, "logits/chosen": 0.634259045124054, "logits/rejected": 0.5795947313308716, "logps/chosen": -3.1849143505096436, "logps/rejected": -3.5473461151123047, "loss": 0.9315, "nll_loss": 0.8752961158752441, "rewards/accuracies": 0.75, "rewards/chosen": -0.3184914290904999, "rewards/margins": 0.03624318167567253, "rewards/rejected": -0.3547345995903015, "step": 1288 }, { "epoch": 3.5290896646132786, "grad_norm": 3.1144051551818848, "learning_rate": 8.234246575342466e-07, "log_odds_chosen": 0.9200620651245117, "log_odds_ratio": -0.5301854610443115, "logits/chosen": 0.8365719318389893, "logits/rejected": 0.8843775987625122, "logps/chosen": -2.5985069274902344, "logps/rejected": -3.470825433731079, "loss": 0.7958, "nll_loss": 0.7427853941917419, "rewards/accuracies": 0.75, "rewards/chosen": -0.25985071063041687, "rewards/margins": 0.08723187446594238, "rewards/rejected": -0.34708258509635925, "step": 1289 }, { "epoch": 3.5318275154004106, "grad_norm": 3.020069122314453, "learning_rate": 8.232876712328767e-07, "log_odds_chosen": 0.200045645236969, "log_odds_ratio": -0.7579836249351501, "logits/chosen": 0.937840461730957, "logits/rejected": 0.8832215666770935, "logps/chosen": -2.0973119735717773, "logps/rejected": -2.1885037422180176, "loss": 0.9037, "nll_loss": 0.8279082179069519, "rewards/accuracies": 0.875, "rewards/chosen": -0.20973122119903564, "rewards/margins": 0.009119178168475628, "rewards/rejected": -0.21885037422180176, "step": 1290 }, { "epoch": 3.5345653661875427, "grad_norm": 2.022233247756958, "learning_rate": 8.231506849315069e-07, "log_odds_chosen": 1.537630558013916, "log_odds_ratio": -0.2435177117586136, "logits/chosen": 0.9436839818954468, "logits/rejected": 0.7983707785606384, "logps/chosen": -2.259902000427246, "logps/rejected": -3.675184488296509, "loss": 0.8544, "nll_loss": 0.8300603628158569, "rewards/accuracies": 1.0, "rewards/chosen": -0.2259902060031891, "rewards/margins": 0.14152826368808746, "rewards/rejected": -0.36751848459243774, "step": 1291 }, { "epoch": 3.5373032169746748, "grad_norm": 2.968757390975952, "learning_rate": 8.23013698630137e-07, "log_odds_chosen": 0.733678936958313, "log_odds_ratio": -0.43498533964157104, "logits/chosen": 0.585045576095581, "logits/rejected": 0.5224371552467346, "logps/chosen": -1.962435245513916, "logps/rejected": -2.591899871826172, "loss": 0.8916, "nll_loss": 0.8480642437934875, "rewards/accuracies": 0.875, "rewards/chosen": -0.1962435245513916, "rewards/margins": 0.06294646114110947, "rewards/rejected": -0.25918999314308167, "step": 1292 }, { "epoch": 3.540041067761807, "grad_norm": 2.183429002761841, "learning_rate": 8.228767123287671e-07, "log_odds_chosen": 1.8643237352371216, "log_odds_ratio": -0.30951938033103943, "logits/chosen": 1.0215498208999634, "logits/rejected": 0.9985499382019043, "logps/chosen": -2.2591021060943604, "logps/rejected": -4.011929512023926, "loss": 0.7634, "nll_loss": 0.7324002981185913, "rewards/accuracies": 1.0, "rewards/chosen": -0.2259102165699005, "rewards/margins": 0.17528271675109863, "rewards/rejected": -0.40119293332099915, "step": 1293 }, { "epoch": 3.5427789185489393, "grad_norm": 2.3350796699523926, "learning_rate": 8.227397260273973e-07, "log_odds_chosen": 0.6147553324699402, "log_odds_ratio": -0.47559383511543274, "logits/chosen": 0.6986587047576904, "logits/rejected": 0.7035121917724609, "logps/chosen": -1.7571576833724976, "logps/rejected": -2.289307117462158, "loss": 0.9371, "nll_loss": 0.8895573019981384, "rewards/accuracies": 0.875, "rewards/chosen": -0.17571577429771423, "rewards/margins": 0.05321495980024338, "rewards/rejected": -0.22893072664737701, "step": 1294 }, { "epoch": 3.5455167693360714, "grad_norm": 2.2779006958007812, "learning_rate": 8.226027397260274e-07, "log_odds_chosen": 2.3453309535980225, "log_odds_ratio": -0.1427188366651535, "logits/chosen": 0.6357036232948303, "logits/rejected": 0.6408465504646301, "logps/chosen": -2.020423173904419, "logps/rejected": -4.204775333404541, "loss": 0.8292, "nll_loss": 0.8149086833000183, "rewards/accuracies": 1.0, "rewards/chosen": -0.2020423263311386, "rewards/margins": 0.21843525767326355, "rewards/rejected": -0.42047756910324097, "step": 1295 }, { "epoch": 3.5482546201232035, "grad_norm": 2.560946226119995, "learning_rate": 8.224657534246576e-07, "log_odds_chosen": 1.6775414943695068, "log_odds_ratio": -0.3429262936115265, "logits/chosen": 1.01500403881073, "logits/rejected": 1.0205665826797485, "logps/chosen": -2.515810966491699, "logps/rejected": -4.136472702026367, "loss": 0.9366, "nll_loss": 0.9022874236106873, "rewards/accuracies": 0.75, "rewards/chosen": -0.251581072807312, "rewards/margins": 0.16206622123718262, "rewards/rejected": -0.41364729404449463, "step": 1296 }, { "epoch": 3.5509924709103355, "grad_norm": 3.1821441650390625, "learning_rate": 8.223287671232876e-07, "log_odds_chosen": 0.5847973823547363, "log_odds_ratio": -0.5794916152954102, "logits/chosen": 1.0311914682388306, "logits/rejected": 0.9684929847717285, "logps/chosen": -2.149775505065918, "logps/rejected": -2.6560301780700684, "loss": 0.778, "nll_loss": 0.7200547456741333, "rewards/accuracies": 0.625, "rewards/chosen": -0.21497754752635956, "rewards/margins": 0.05062549561262131, "rewards/rejected": -0.26560303568840027, "step": 1297 }, { "epoch": 3.5537303216974676, "grad_norm": 2.273700714111328, "learning_rate": 8.221917808219178e-07, "log_odds_chosen": 1.2309613227844238, "log_odds_ratio": -0.39883485436439514, "logits/chosen": 0.6739688515663147, "logits/rejected": 0.5792591571807861, "logps/chosen": -1.4230389595031738, "logps/rejected": -2.5354530811309814, "loss": 0.9079, "nll_loss": 0.8680132627487183, "rewards/accuracies": 1.0, "rewards/chosen": -0.14230388402938843, "rewards/margins": 0.111241415143013, "rewards/rejected": -0.2535453140735626, "step": 1298 }, { "epoch": 3.5564681724845997, "grad_norm": 2.235891103744507, "learning_rate": 8.22054794520548e-07, "log_odds_chosen": 1.0987728834152222, "log_odds_ratio": -0.32023531198501587, "logits/chosen": 0.706619143486023, "logits/rejected": 0.6610256433486938, "logps/chosen": -1.6608285903930664, "logps/rejected": -2.6154093742370605, "loss": 0.8685, "nll_loss": 0.8364837169647217, "rewards/accuracies": 1.0, "rewards/chosen": -0.16608285903930664, "rewards/margins": 0.09545806795358658, "rewards/rejected": -0.2615409195423126, "step": 1299 }, { "epoch": 3.5592060232717317, "grad_norm": 2.6131603717803955, "learning_rate": 8.21917808219178e-07, "log_odds_chosen": 0.8699833154678345, "log_odds_ratio": -0.4398592710494995, "logits/chosen": 1.108511209487915, "logits/rejected": 1.09761643409729, "logps/chosen": -1.891376256942749, "logps/rejected": -2.6515145301818848, "loss": 0.8573, "nll_loss": 0.8133413791656494, "rewards/accuracies": 0.75, "rewards/chosen": -0.18913763761520386, "rewards/margins": 0.07601384073495865, "rewards/rejected": -0.2651514708995819, "step": 1300 }, { "epoch": 3.561943874058864, "grad_norm": 2.786437511444092, "learning_rate": 8.217808219178082e-07, "log_odds_chosen": 1.7125025987625122, "log_odds_ratio": -0.3674604594707489, "logits/chosen": 1.1108083724975586, "logits/rejected": 1.084302544593811, "logps/chosen": -2.5180273056030273, "logps/rejected": -4.070000648498535, "loss": 0.8109, "nll_loss": 0.774196982383728, "rewards/accuracies": 0.875, "rewards/chosen": -0.2518027126789093, "rewards/margins": 0.1551973521709442, "rewards/rejected": -0.4070000648498535, "step": 1301 }, { "epoch": 3.564681724845996, "grad_norm": 2.5490691661834717, "learning_rate": 8.216438356164384e-07, "log_odds_chosen": 0.7431935667991638, "log_odds_ratio": -0.46466684341430664, "logits/chosen": 0.7882999777793884, "logits/rejected": 0.8146393895149231, "logps/chosen": -2.0678014755249023, "logps/rejected": -2.729321002960205, "loss": 0.8923, "nll_loss": 0.8458426594734192, "rewards/accuracies": 0.75, "rewards/chosen": -0.20678013563156128, "rewards/margins": 0.06615195423364639, "rewards/rejected": -0.2729320824146271, "step": 1302 }, { "epoch": 3.567419575633128, "grad_norm": 2.795318126678467, "learning_rate": 8.215068493150685e-07, "log_odds_chosen": 1.0560312271118164, "log_odds_ratio": -0.5439594984054565, "logits/chosen": 0.8606399297714233, "logits/rejected": 0.8860266208648682, "logps/chosen": -2.3887715339660645, "logps/rejected": -3.3523356914520264, "loss": 0.8929, "nll_loss": 0.8384813070297241, "rewards/accuracies": 0.75, "rewards/chosen": -0.23887714743614197, "rewards/margins": 0.09635642170906067, "rewards/rejected": -0.33523356914520264, "step": 1303 }, { "epoch": 3.57015742642026, "grad_norm": 2.1033456325531006, "learning_rate": 8.213698630136986e-07, "log_odds_chosen": 2.0198001861572266, "log_odds_ratio": -0.2852853536605835, "logits/chosen": 0.6782886385917664, "logits/rejected": 0.6412624716758728, "logps/chosen": -1.9524974822998047, "logps/rejected": -3.844841480255127, "loss": 0.7967, "nll_loss": 0.7681518793106079, "rewards/accuracies": 1.0, "rewards/chosen": -0.1952497363090515, "rewards/margins": 0.1892344206571579, "rewards/rejected": -0.3844841420650482, "step": 1304 }, { "epoch": 3.572895277207392, "grad_norm": 2.3051857948303223, "learning_rate": 8.212328767123288e-07, "log_odds_chosen": 0.6027922034263611, "log_odds_ratio": -0.5304517149925232, "logits/chosen": 0.8285690546035767, "logits/rejected": 0.8811949491500854, "logps/chosen": -2.395921230316162, "logps/rejected": -2.9438414573669434, "loss": 0.9557, "nll_loss": 0.902666449546814, "rewards/accuracies": 0.875, "rewards/chosen": -0.23959210515022278, "rewards/margins": 0.05479203164577484, "rewards/rejected": -0.2943841218948364, "step": 1305 }, { "epoch": 3.575633127994524, "grad_norm": 3.7285408973693848, "learning_rate": 8.210958904109589e-07, "log_odds_chosen": 0.7696546316146851, "log_odds_ratio": -0.6341570615768433, "logits/chosen": 0.6277993321418762, "logits/rejected": 0.6105331182479858, "logps/chosen": -2.427738666534424, "logps/rejected": -3.0852177143096924, "loss": 0.926, "nll_loss": 0.862570583820343, "rewards/accuracies": 0.75, "rewards/chosen": -0.2427738606929779, "rewards/margins": 0.06574790179729462, "rewards/rejected": -0.30852174758911133, "step": 1306 }, { "epoch": 3.578370978781656, "grad_norm": 2.550163507461548, "learning_rate": 8.20958904109589e-07, "log_odds_chosen": 0.4815358519554138, "log_odds_ratio": -0.6251411437988281, "logits/chosen": 0.8424127101898193, "logits/rejected": 0.8059905171394348, "logps/chosen": -2.5400965213775635, "logps/rejected": -2.9667649269104004, "loss": 0.8049, "nll_loss": 0.7423467636108398, "rewards/accuracies": 0.75, "rewards/chosen": -0.2540096640586853, "rewards/margins": 0.04266684874892235, "rewards/rejected": -0.29667651653289795, "step": 1307 }, { "epoch": 3.5811088295687883, "grad_norm": 1.979461669921875, "learning_rate": 8.208219178082192e-07, "log_odds_chosen": 1.3894505500793457, "log_odds_ratio": -0.3587256968021393, "logits/chosen": 0.6058880686759949, "logits/rejected": 0.5545961856842041, "logps/chosen": -1.4298560619354248, "logps/rejected": -2.604915142059326, "loss": 0.8483, "nll_loss": 0.8124584555625916, "rewards/accuracies": 0.875, "rewards/chosen": -0.14298561215400696, "rewards/margins": 0.11750590056180954, "rewards/rejected": -0.2604915201663971, "step": 1308 }, { "epoch": 3.5838466803559204, "grad_norm": 2.6704134941101074, "learning_rate": 8.206849315068493e-07, "log_odds_chosen": 1.9939124584197998, "log_odds_ratio": -0.28564685583114624, "logits/chosen": 0.8456396460533142, "logits/rejected": 0.8777058124542236, "logps/chosen": -2.3830628395080566, "logps/rejected": -4.292128562927246, "loss": 0.8611, "nll_loss": 0.832514762878418, "rewards/accuracies": 1.0, "rewards/chosen": -0.23830628395080566, "rewards/margins": 0.1909065991640091, "rewards/rejected": -0.42921286821365356, "step": 1309 }, { "epoch": 3.586584531143053, "grad_norm": 2.3698537349700928, "learning_rate": 8.205479452054795e-07, "log_odds_chosen": 0.5278069972991943, "log_odds_ratio": -0.6672852039337158, "logits/chosen": 0.6826924085617065, "logits/rejected": 0.6998278498649597, "logps/chosen": -2.273768663406372, "logps/rejected": -2.8104779720306396, "loss": 0.8682, "nll_loss": 0.8015108704566956, "rewards/accuracies": 0.625, "rewards/chosen": -0.22737686336040497, "rewards/margins": 0.05367092788219452, "rewards/rejected": -0.2810477912425995, "step": 1310 }, { "epoch": 3.589322381930185, "grad_norm": 4.191049098968506, "learning_rate": 8.204109589041096e-07, "log_odds_chosen": 0.38042911887168884, "log_odds_ratio": -0.6774453520774841, "logits/chosen": 0.7826336026191711, "logits/rejected": 0.7401716113090515, "logps/chosen": -2.4597532749176025, "logps/rejected": -2.745823860168457, "loss": 0.8502, "nll_loss": 0.7824501395225525, "rewards/accuracies": 0.625, "rewards/chosen": -0.2459753304719925, "rewards/margins": 0.028607048094272614, "rewards/rejected": -0.2745823860168457, "step": 1311 }, { "epoch": 3.592060232717317, "grad_norm": 2.6351377964019775, "learning_rate": 8.202739726027397e-07, "log_odds_chosen": 1.345003366470337, "log_odds_ratio": -0.3959560692310333, "logits/chosen": 0.6412199139595032, "logits/rejected": 0.5859937071800232, "logps/chosen": -1.7097827196121216, "logps/rejected": -2.9431731700897217, "loss": 0.9108, "nll_loss": 0.8711977005004883, "rewards/accuracies": 0.75, "rewards/chosen": -0.17097826302051544, "rewards/margins": 0.12333901971578598, "rewards/rejected": -0.2943173050880432, "step": 1312 }, { "epoch": 3.594798083504449, "grad_norm": 3.7458090782165527, "learning_rate": 8.201369863013699e-07, "log_odds_chosen": 0.8771679997444153, "log_odds_ratio": -0.7469844818115234, "logits/chosen": 0.8972948789596558, "logits/rejected": 0.8079447746276855, "logps/chosen": -2.6153626441955566, "logps/rejected": -3.3876852989196777, "loss": 0.8996, "nll_loss": 0.824949324131012, "rewards/accuracies": 0.75, "rewards/chosen": -0.26153627038002014, "rewards/margins": 0.0772322490811348, "rewards/rejected": -0.33876851201057434, "step": 1313 }, { "epoch": 3.597535934291581, "grad_norm": 3.087890863418579, "learning_rate": 8.199999999999999e-07, "log_odds_chosen": 1.0249072313308716, "log_odds_ratio": -0.6288739442825317, "logits/chosen": 0.9580212831497192, "logits/rejected": 0.9396730661392212, "logps/chosen": -3.224397659301758, "logps/rejected": -4.181240081787109, "loss": 0.8513, "nll_loss": 0.7884604930877686, "rewards/accuracies": 0.625, "rewards/chosen": -0.3224397897720337, "rewards/margins": 0.09568418562412262, "rewards/rejected": -0.4181239604949951, "step": 1314 }, { "epoch": 3.600273785078713, "grad_norm": 4.014603614807129, "learning_rate": 8.198630136986301e-07, "log_odds_chosen": 0.5889814496040344, "log_odds_ratio": -0.7267225980758667, "logits/chosen": 0.8496195077896118, "logits/rejected": 0.8159669637680054, "logps/chosen": -2.4669713973999023, "logps/rejected": -2.949331283569336, "loss": 0.8751, "nll_loss": 0.8023922443389893, "rewards/accuracies": 0.75, "rewards/chosen": -0.24669714272022247, "rewards/margins": 0.048235952854156494, "rewards/rejected": -0.29493311047554016, "step": 1315 }, { "epoch": 3.6030116358658453, "grad_norm": 1.9007540941238403, "learning_rate": 8.197260273972603e-07, "log_odds_chosen": 1.6556450128555298, "log_odds_ratio": -0.3756255805492401, "logits/chosen": 0.9867030382156372, "logits/rejected": 1.0074951648712158, "logps/chosen": -1.7461011409759521, "logps/rejected": -3.2161693572998047, "loss": 0.7719, "nll_loss": 0.73436439037323, "rewards/accuracies": 0.875, "rewards/chosen": -0.17461013793945312, "rewards/margins": 0.1470068097114563, "rewards/rejected": -0.32161691784858704, "step": 1316 }, { "epoch": 3.6057494866529773, "grad_norm": 2.3864822387695312, "learning_rate": 8.195890410958903e-07, "log_odds_chosen": 1.261733055114746, "log_odds_ratio": -0.5263568758964539, "logits/chosen": 0.7120022773742676, "logits/rejected": 0.682619035243988, "logps/chosen": -2.1811628341674805, "logps/rejected": -3.3897101879119873, "loss": 0.8889, "nll_loss": 0.8362593650817871, "rewards/accuracies": 0.625, "rewards/chosen": -0.21811626851558685, "rewards/margins": 0.12085475027561188, "rewards/rejected": -0.3389710485935211, "step": 1317 }, { "epoch": 3.6084873374401094, "grad_norm": 3.8533666133880615, "learning_rate": 8.194520547945205e-07, "log_odds_chosen": 0.6335099339485168, "log_odds_ratio": -0.5410783290863037, "logits/chosen": 1.0468943119049072, "logits/rejected": 1.0718028545379639, "logps/chosen": -2.9885470867156982, "logps/rejected": -3.5801167488098145, "loss": 0.8544, "nll_loss": 0.8003319501876831, "rewards/accuracies": 0.875, "rewards/chosen": -0.2988547086715698, "rewards/margins": 0.05915696173906326, "rewards/rejected": -0.3580116629600525, "step": 1318 }, { "epoch": 3.6112251882272415, "grad_norm": 1.960870623588562, "learning_rate": 8.193150684931507e-07, "log_odds_chosen": 2.3785183429718018, "log_odds_ratio": -0.2249668836593628, "logits/chosen": 0.6516073942184448, "logits/rejected": 0.6658798456192017, "logps/chosen": -1.2958576679229736, "logps/rejected": -3.3609538078308105, "loss": 0.7987, "nll_loss": 0.776192307472229, "rewards/accuracies": 1.0, "rewards/chosen": -0.12958577275276184, "rewards/margins": 0.20650959014892578, "rewards/rejected": -0.3360953629016876, "step": 1319 }, { "epoch": 3.613963039014374, "grad_norm": 2.1465981006622314, "learning_rate": 8.191780821917808e-07, "log_odds_chosen": 1.3878271579742432, "log_odds_ratio": -0.3658261001110077, "logits/chosen": 0.7148299217224121, "logits/rejected": 0.6492279767990112, "logps/chosen": -1.9105521440505981, "logps/rejected": -3.1664648056030273, "loss": 0.9367, "nll_loss": 0.9001445770263672, "rewards/accuracies": 1.0, "rewards/chosen": -0.19105522334575653, "rewards/margins": 0.1255912482738495, "rewards/rejected": -0.3166464567184448, "step": 1320 }, { "epoch": 3.616700889801506, "grad_norm": 2.2669436931610107, "learning_rate": 8.190410958904109e-07, "log_odds_chosen": 1.3780479431152344, "log_odds_ratio": -0.3099329173564911, "logits/chosen": 0.773127019405365, "logits/rejected": 0.7197114825248718, "logps/chosen": -2.4143948554992676, "logps/rejected": -3.717383623123169, "loss": 0.7863, "nll_loss": 0.7552933096885681, "rewards/accuracies": 0.875, "rewards/chosen": -0.24143949151039124, "rewards/margins": 0.13029888272285461, "rewards/rejected": -0.37173837423324585, "step": 1321 }, { "epoch": 3.619438740588638, "grad_norm": 2.6347649097442627, "learning_rate": 8.189041095890411e-07, "log_odds_chosen": 1.5022172927856445, "log_odds_ratio": -0.31951749324798584, "logits/chosen": 0.9439678192138672, "logits/rejected": 0.9632889628410339, "logps/chosen": -1.8736079931259155, "logps/rejected": -3.117065668106079, "loss": 0.7629, "nll_loss": 0.7309862375259399, "rewards/accuracies": 0.875, "rewards/chosen": -0.18736079335212708, "rewards/margins": 0.12434578686952591, "rewards/rejected": -0.3117066025733948, "step": 1322 }, { "epoch": 3.62217659137577, "grad_norm": 2.055846691131592, "learning_rate": 8.187671232876712e-07, "log_odds_chosen": 2.1534790992736816, "log_odds_ratio": -0.27144670486450195, "logits/chosen": 0.9616314172744751, "logits/rejected": 0.9666619300842285, "logps/chosen": -2.1338608264923096, "logps/rejected": -4.164782524108887, "loss": 0.7851, "nll_loss": 0.7579072117805481, "rewards/accuracies": 0.875, "rewards/chosen": -0.21338608860969543, "rewards/margins": 0.20309215784072876, "rewards/rejected": -0.4164782464504242, "step": 1323 }, { "epoch": 3.6249144421629023, "grad_norm": 2.5218310356140137, "learning_rate": 8.186301369863013e-07, "log_odds_chosen": 1.1871248483657837, "log_odds_ratio": -0.5094224214553833, "logits/chosen": 0.5844836831092834, "logits/rejected": 0.5611062049865723, "logps/chosen": -2.5212528705596924, "logps/rejected": -3.4652504920959473, "loss": 0.8625, "nll_loss": 0.811593234539032, "rewards/accuracies": 0.75, "rewards/chosen": -0.2521253228187561, "rewards/margins": 0.09439975023269653, "rewards/rejected": -0.34652507305145264, "step": 1324 }, { "epoch": 3.6276522929500343, "grad_norm": 2.1881814002990723, "learning_rate": 8.184931506849315e-07, "log_odds_chosen": 1.2751851081848145, "log_odds_ratio": -0.35038256645202637, "logits/chosen": 0.6350479125976562, "logits/rejected": 0.6121020913124084, "logps/chosen": -1.6176846027374268, "logps/rejected": -2.775149345397949, "loss": 0.8541, "nll_loss": 0.8190920352935791, "rewards/accuracies": 0.875, "rewards/chosen": -0.16176846623420715, "rewards/margins": 0.11574646085500717, "rewards/rejected": -0.2775149345397949, "step": 1325 }, { "epoch": 3.6303901437371664, "grad_norm": 2.4999284744262695, "learning_rate": 8.183561643835616e-07, "log_odds_chosen": 1.0879461765289307, "log_odds_ratio": -0.3312076926231384, "logits/chosen": 0.8899214267730713, "logits/rejected": 0.8514860272407532, "logps/chosen": -1.9605023860931396, "logps/rejected": -2.9603641033172607, "loss": 0.9018, "nll_loss": 0.8686801195144653, "rewards/accuracies": 1.0, "rewards/chosen": -0.1960502415895462, "rewards/margins": 0.09998616576194763, "rewards/rejected": -0.29603642225265503, "step": 1326 }, { "epoch": 3.6331279945242985, "grad_norm": 2.5638608932495117, "learning_rate": 8.182191780821918e-07, "log_odds_chosen": 0.4672278165817261, "log_odds_ratio": -0.5534375905990601, "logits/chosen": 0.6490907073020935, "logits/rejected": 0.6222102046012878, "logps/chosen": -2.073500871658325, "logps/rejected": -2.4730305671691895, "loss": 0.9757, "nll_loss": 0.9203310012817383, "rewards/accuracies": 0.75, "rewards/chosen": -0.20735007524490356, "rewards/margins": 0.03995298594236374, "rewards/rejected": -0.2473030686378479, "step": 1327 }, { "epoch": 3.6358658453114305, "grad_norm": 1.8874326944351196, "learning_rate": 8.180821917808218e-07, "log_odds_chosen": 2.11592435836792, "log_odds_ratio": -0.22650763392448425, "logits/chosen": 0.6899722814559937, "logits/rejected": 0.6692426800727844, "logps/chosen": -1.7157498598098755, "logps/rejected": -3.689113140106201, "loss": 0.7598, "nll_loss": 0.7371256351470947, "rewards/accuracies": 1.0, "rewards/chosen": -0.17157498002052307, "rewards/margins": 0.1973363161087036, "rewards/rejected": -0.3689112663269043, "step": 1328 }, { "epoch": 3.6386036960985626, "grad_norm": 2.288517713546753, "learning_rate": 8.17945205479452e-07, "log_odds_chosen": 1.7327572107315063, "log_odds_ratio": -0.3672385811805725, "logits/chosen": 0.8749701976776123, "logits/rejected": 0.9261814951896667, "logps/chosen": -2.171388626098633, "logps/rejected": -3.780278444290161, "loss": 0.8328, "nll_loss": 0.7960976362228394, "rewards/accuracies": 0.75, "rewards/chosen": -0.21713887155056, "rewards/margins": 0.16088899970054626, "rewards/rejected": -0.37802785634994507, "step": 1329 }, { "epoch": 3.6413415468856947, "grad_norm": 2.256246566772461, "learning_rate": 8.178082191780822e-07, "log_odds_chosen": 1.2349870204925537, "log_odds_ratio": -0.34667694568634033, "logits/chosen": 0.7164132595062256, "logits/rejected": 0.6731963157653809, "logps/chosen": -2.0164222717285156, "logps/rejected": -3.1454639434814453, "loss": 0.9219, "nll_loss": 0.8872107863426208, "rewards/accuracies": 1.0, "rewards/chosen": -0.201642245054245, "rewards/margins": 0.1129041463136673, "rewards/rejected": -0.3145463764667511, "step": 1330 }, { "epoch": 3.6440793976728267, "grad_norm": 3.5259735584259033, "learning_rate": 8.176712328767122e-07, "log_odds_chosen": -0.009562063962221146, "log_odds_ratio": -0.7138275504112244, "logits/chosen": 0.5024605989456177, "logits/rejected": 0.4553811550140381, "logps/chosen": -2.6158509254455566, "logps/rejected": -2.5965654850006104, "loss": 0.9774, "nll_loss": 0.9060567617416382, "rewards/accuracies": 0.5, "rewards/chosen": -0.2615850865840912, "rewards/margins": -0.001928526908159256, "rewards/rejected": -0.2596565783023834, "step": 1331 }, { "epoch": 3.646817248459959, "grad_norm": 3.054109811782837, "learning_rate": 8.175342465753424e-07, "log_odds_chosen": 1.9194871187210083, "log_odds_ratio": -0.3104850947856903, "logits/chosen": 0.6772080659866333, "logits/rejected": 0.6737880706787109, "logps/chosen": -2.640423536300659, "logps/rejected": -4.46307373046875, "loss": 0.9232, "nll_loss": 0.8921846151351929, "rewards/accuracies": 0.875, "rewards/chosen": -0.26404234766960144, "rewards/margins": 0.18226505815982819, "rewards/rejected": -0.44630739092826843, "step": 1332 }, { "epoch": 3.649555099247091, "grad_norm": 2.2077817916870117, "learning_rate": 8.173972602739726e-07, "log_odds_chosen": 2.0624382495880127, "log_odds_ratio": -0.18794429302215576, "logits/chosen": 0.9083560109138489, "logits/rejected": 0.9255533814430237, "logps/chosen": -2.045243501663208, "logps/rejected": -3.9662957191467285, "loss": 0.7779, "nll_loss": 0.7591519355773926, "rewards/accuracies": 1.0, "rewards/chosen": -0.20452436804771423, "rewards/margins": 0.192105233669281, "rewards/rejected": -0.39662957191467285, "step": 1333 }, { "epoch": 3.652292950034223, "grad_norm": 3.506502389907837, "learning_rate": 8.172602739726027e-07, "log_odds_chosen": 0.3376651704311371, "log_odds_ratio": -0.6483545303344727, "logits/chosen": 0.6039657592773438, "logits/rejected": 0.5765583515167236, "logps/chosen": -2.2619330883026123, "logps/rejected": -2.5372824668884277, "loss": 1.0605, "nll_loss": 0.9956417679786682, "rewards/accuracies": 0.875, "rewards/chosen": -0.22619330883026123, "rewards/margins": 0.027534915134310722, "rewards/rejected": -0.2537282109260559, "step": 1334 }, { "epoch": 3.655030800821355, "grad_norm": 2.221362352371216, "learning_rate": 8.171232876712328e-07, "log_odds_chosen": 0.9544479846954346, "log_odds_ratio": -0.4214009940624237, "logits/chosen": 0.6050751209259033, "logits/rejected": 0.5593668818473816, "logps/chosen": -1.7955867052078247, "logps/rejected": -2.649976968765259, "loss": 0.8746, "nll_loss": 0.8324466943740845, "rewards/accuracies": 0.75, "rewards/chosen": -0.179558664560318, "rewards/margins": 0.0854390412569046, "rewards/rejected": -0.2649977207183838, "step": 1335 }, { "epoch": 3.657768651608487, "grad_norm": 2.082871198654175, "learning_rate": 8.16986301369863e-07, "log_odds_chosen": 1.6232116222381592, "log_odds_ratio": -0.36402684450149536, "logits/chosen": 0.7841426134109497, "logits/rejected": 0.8059374094009399, "logps/chosen": -1.6164798736572266, "logps/rejected": -3.123166084289551, "loss": 0.8065, "nll_loss": 0.7701132893562317, "rewards/accuracies": 0.875, "rewards/chosen": -0.1616479903459549, "rewards/margins": 0.15066863596439362, "rewards/rejected": -0.3123165965080261, "step": 1336 }, { "epoch": 3.6605065023956196, "grad_norm": 2.6725997924804688, "learning_rate": 8.168493150684931e-07, "log_odds_chosen": 1.350403904914856, "log_odds_ratio": -0.25665077567100525, "logits/chosen": 0.7610483765602112, "logits/rejected": 0.6871339082717896, "logps/chosen": -1.8213701248168945, "logps/rejected": -3.009676456451416, "loss": 0.8235, "nll_loss": 0.797788679599762, "rewards/accuracies": 1.0, "rewards/chosen": -0.18213701248168945, "rewards/margins": 0.11883062869310379, "rewards/rejected": -0.30096763372421265, "step": 1337 }, { "epoch": 3.6632443531827517, "grad_norm": 3.1258933544158936, "learning_rate": 8.167123287671232e-07, "log_odds_chosen": 0.8794423341751099, "log_odds_ratio": -0.5394546389579773, "logits/chosen": 0.9887106418609619, "logits/rejected": 0.967979907989502, "logps/chosen": -2.700282335281372, "logps/rejected": -3.479311466217041, "loss": 0.7226, "nll_loss": 0.6686151027679443, "rewards/accuracies": 0.875, "rewards/chosen": -0.2700282633304596, "rewards/margins": 0.0779029130935669, "rewards/rejected": -0.3479311764240265, "step": 1338 }, { "epoch": 3.6659822039698837, "grad_norm": 2.4087746143341064, "learning_rate": 8.165753424657534e-07, "log_odds_chosen": 0.8572689294815063, "log_odds_ratio": -0.4491696357727051, "logits/chosen": 0.8762065172195435, "logits/rejected": 0.8282966613769531, "logps/chosen": -2.2409112453460693, "logps/rejected": -3.041151285171509, "loss": 0.8514, "nll_loss": 0.8064572811126709, "rewards/accuracies": 0.875, "rewards/chosen": -0.22409111261367798, "rewards/margins": 0.08002400398254395, "rewards/rejected": -0.3041151165962219, "step": 1339 }, { "epoch": 3.668720054757016, "grad_norm": 2.2041642665863037, "learning_rate": 8.164383561643835e-07, "log_odds_chosen": 1.9607421159744263, "log_odds_ratio": -0.2445380985736847, "logits/chosen": 0.8675950169563293, "logits/rejected": 0.8103243112564087, "logps/chosen": -2.372021436691284, "logps/rejected": -4.226287841796875, "loss": 0.8284, "nll_loss": 0.8039184808731079, "rewards/accuracies": 1.0, "rewards/chosen": -0.23720215260982513, "rewards/margins": 0.18542668223381042, "rewards/rejected": -0.42262884974479675, "step": 1340 }, { "epoch": 3.671457905544148, "grad_norm": 2.4142868518829346, "learning_rate": 8.163013698630137e-07, "log_odds_chosen": 1.6755807399749756, "log_odds_ratio": -0.4366362690925598, "logits/chosen": 0.8436853289604187, "logits/rejected": 0.7482005953788757, "logps/chosen": -1.6173752546310425, "logps/rejected": -3.1961255073547363, "loss": 0.8995, "nll_loss": 0.8558209538459778, "rewards/accuracies": 0.75, "rewards/chosen": -0.16173753142356873, "rewards/margins": 0.15787501633167267, "rewards/rejected": -0.3196125626564026, "step": 1341 }, { "epoch": 3.67419575633128, "grad_norm": 2.407686471939087, "learning_rate": 8.161643835616437e-07, "log_odds_chosen": 1.4284144639968872, "log_odds_ratio": -0.3599366545677185, "logits/chosen": 0.4749172329902649, "logits/rejected": 0.38689640164375305, "logps/chosen": -1.8473213911056519, "logps/rejected": -3.0744659900665283, "loss": 0.853, "nll_loss": 0.8170058727264404, "rewards/accuracies": 0.875, "rewards/chosen": -0.18473213911056519, "rewards/margins": 0.12271444499492645, "rewards/rejected": -0.30744659900665283, "step": 1342 }, { "epoch": 3.676933607118412, "grad_norm": 3.2090513706207275, "learning_rate": 8.160273972602739e-07, "log_odds_chosen": 1.8215442895889282, "log_odds_ratio": -0.519601047039032, "logits/chosen": 0.7304407954216003, "logits/rejected": 0.7039045691490173, "logps/chosen": -2.421623706817627, "logps/rejected": -4.143192768096924, "loss": 0.8698, "nll_loss": 0.8178470134735107, "rewards/accuracies": 0.75, "rewards/chosen": -0.24216236174106598, "rewards/margins": 0.17215695977210999, "rewards/rejected": -0.41431930661201477, "step": 1343 }, { "epoch": 3.679671457905544, "grad_norm": 2.5981814861297607, "learning_rate": 8.158904109589041e-07, "log_odds_chosen": 0.6670018434524536, "log_odds_ratio": -0.4258907437324524, "logits/chosen": 0.6248947978019714, "logits/rejected": 0.49191439151763916, "logps/chosen": -1.5533812046051025, "logps/rejected": -2.080113649368286, "loss": 0.9174, "nll_loss": 0.8747948408126831, "rewards/accuracies": 1.0, "rewards/chosen": -0.1553381383419037, "rewards/margins": 0.05267325043678284, "rewards/rejected": -0.20801138877868652, "step": 1344 }, { "epoch": 3.682409308692676, "grad_norm": 2.319941520690918, "learning_rate": 8.157534246575341e-07, "log_odds_chosen": 2.2124433517456055, "log_odds_ratio": -0.269287109375, "logits/chosen": 0.6177331805229187, "logits/rejected": 0.591313898563385, "logps/chosen": -1.7378480434417725, "logps/rejected": -3.7865824699401855, "loss": 0.8547, "nll_loss": 0.8277671337127686, "rewards/accuracies": 0.875, "rewards/chosen": -0.17378482222557068, "rewards/margins": 0.2048734426498413, "rewards/rejected": -0.378658264875412, "step": 1345 }, { "epoch": 3.685147159479808, "grad_norm": 2.7649340629577637, "learning_rate": 8.156164383561643e-07, "log_odds_chosen": 2.0230822563171387, "log_odds_ratio": -0.2617809474468231, "logits/chosen": 1.1091116666793823, "logits/rejected": 1.1026124954223633, "logps/chosen": -2.482470989227295, "logps/rejected": -4.409730911254883, "loss": 0.7409, "nll_loss": 0.7147334814071655, "rewards/accuracies": 1.0, "rewards/chosen": -0.24824710190296173, "rewards/margins": 0.1927260160446167, "rewards/rejected": -0.44097310304641724, "step": 1346 }, { "epoch": 3.6878850102669407, "grad_norm": 2.0976452827453613, "learning_rate": 8.154794520547945e-07, "log_odds_chosen": 1.5832328796386719, "log_odds_ratio": -0.2655344307422638, "logits/chosen": 0.7259552478790283, "logits/rejected": 0.6930973529815674, "logps/chosen": -1.7453360557556152, "logps/rejected": -3.1343469619750977, "loss": 0.7569, "nll_loss": 0.7303389310836792, "rewards/accuracies": 0.875, "rewards/chosen": -0.17453360557556152, "rewards/margins": 0.13890111446380615, "rewards/rejected": -0.3134347200393677, "step": 1347 }, { "epoch": 3.690622861054073, "grad_norm": 2.5104520320892334, "learning_rate": 8.153424657534246e-07, "log_odds_chosen": 0.49737340211868286, "log_odds_ratio": -0.5132155418395996, "logits/chosen": 0.6974083185195923, "logits/rejected": 0.6899108290672302, "logps/chosen": -2.376372814178467, "logps/rejected": -2.8320937156677246, "loss": 0.8896, "nll_loss": 0.838323712348938, "rewards/accuracies": 0.75, "rewards/chosen": -0.23763728141784668, "rewards/margins": 0.045572079718112946, "rewards/rejected": -0.2832093834877014, "step": 1348 }, { "epoch": 3.693360711841205, "grad_norm": 2.8616867065429688, "learning_rate": 8.152054794520547e-07, "log_odds_chosen": 1.6637240648269653, "log_odds_ratio": -0.4509570896625519, "logits/chosen": 0.9692530035972595, "logits/rejected": 0.9877185821533203, "logps/chosen": -2.397968292236328, "logps/rejected": -4.0234270095825195, "loss": 0.8623, "nll_loss": 0.8171901702880859, "rewards/accuracies": 0.75, "rewards/chosen": -0.23979683220386505, "rewards/margins": 0.16254590451717377, "rewards/rejected": -0.40234270691871643, "step": 1349 }, { "epoch": 3.696098562628337, "grad_norm": 2.5326578617095947, "learning_rate": 8.150684931506849e-07, "log_odds_chosen": 0.5083693265914917, "log_odds_ratio": -0.49251681566238403, "logits/chosen": 0.7592209577560425, "logits/rejected": 0.7657415270805359, "logps/chosen": -1.5080890655517578, "logps/rejected": -1.9235560894012451, "loss": 0.8045, "nll_loss": 0.7551994323730469, "rewards/accuracies": 0.75, "rewards/chosen": -0.1508089154958725, "rewards/margins": 0.04154668375849724, "rewards/rejected": -0.19235560297966003, "step": 1350 }, { "epoch": 3.698836413415469, "grad_norm": 3.1450867652893066, "learning_rate": 8.14931506849315e-07, "log_odds_chosen": 1.3348889350891113, "log_odds_ratio": -0.5387598276138306, "logits/chosen": 0.8320509791374207, "logits/rejected": 0.8226557374000549, "logps/chosen": -2.3746285438537598, "logps/rejected": -3.64139461517334, "loss": 0.9261, "nll_loss": 0.8722625374794006, "rewards/accuracies": 0.625, "rewards/chosen": -0.2374628484249115, "rewards/margins": 0.12667660415172577, "rewards/rejected": -0.3641394376754761, "step": 1351 }, { "epoch": 3.701574264202601, "grad_norm": 2.402174949645996, "learning_rate": 8.147945205479451e-07, "log_odds_chosen": 0.9596340656280518, "log_odds_ratio": -0.4057536721229553, "logits/chosen": 0.471774697303772, "logits/rejected": 0.4052160680294037, "logps/chosen": -1.712186574935913, "logps/rejected": -2.566195487976074, "loss": 0.8249, "nll_loss": 0.7843303680419922, "rewards/accuracies": 0.875, "rewards/chosen": -0.1712186634540558, "rewards/margins": 0.08540090918540955, "rewards/rejected": -0.25661957263946533, "step": 1352 }, { "epoch": 3.704312114989733, "grad_norm": 2.301771640777588, "learning_rate": 8.146575342465753e-07, "log_odds_chosen": 0.7316854000091553, "log_odds_ratio": -0.41982153058052063, "logits/chosen": 0.6658150553703308, "logits/rejected": 0.5643457770347595, "logps/chosen": -1.4020206928253174, "logps/rejected": -1.9989047050476074, "loss": 0.8563, "nll_loss": 0.8143575191497803, "rewards/accuracies": 0.875, "rewards/chosen": -0.14020206034183502, "rewards/margins": 0.05968841537833214, "rewards/rejected": -0.19989047944545746, "step": 1353 }, { "epoch": 3.707049965776865, "grad_norm": 2.1103274822235107, "learning_rate": 8.145205479452054e-07, "log_odds_chosen": 1.284555435180664, "log_odds_ratio": -0.36309459805488586, "logits/chosen": 0.8773530721664429, "logits/rejected": 0.7961647510528564, "logps/chosen": -2.1910345554351807, "logps/rejected": -3.4098305702209473, "loss": 0.873, "nll_loss": 0.8366434574127197, "rewards/accuracies": 0.75, "rewards/chosen": -0.21910344064235687, "rewards/margins": 0.12187959253787994, "rewards/rejected": -0.3409830331802368, "step": 1354 }, { "epoch": 3.7097878165639973, "grad_norm": 2.195485830307007, "learning_rate": 8.143835616438356e-07, "log_odds_chosen": 1.3056609630584717, "log_odds_ratio": -0.31092479825019836, "logits/chosen": 0.9357938170433044, "logits/rejected": 0.9170336723327637, "logps/chosen": -1.6389334201812744, "logps/rejected": -2.822080135345459, "loss": 0.7474, "nll_loss": 0.7163316011428833, "rewards/accuracies": 1.0, "rewards/chosen": -0.16389334201812744, "rewards/margins": 0.1183146983385086, "rewards/rejected": -0.28220805525779724, "step": 1355 }, { "epoch": 3.7125256673511293, "grad_norm": 2.840036392211914, "learning_rate": 8.142465753424657e-07, "log_odds_chosen": 1.8392343521118164, "log_odds_ratio": -0.30471402406692505, "logits/chosen": 0.908064603805542, "logits/rejected": 0.9455490112304688, "logps/chosen": -1.6888693571090698, "logps/rejected": -3.273641586303711, "loss": 0.812, "nll_loss": 0.7814912796020508, "rewards/accuracies": 0.875, "rewards/chosen": -0.1688869297504425, "rewards/margins": 0.15847723186016083, "rewards/rejected": -0.32736414670944214, "step": 1356 }, { "epoch": 3.7152635181382614, "grad_norm": 2.0658862590789795, "learning_rate": 8.141095890410958e-07, "log_odds_chosen": 2.581998109817505, "log_odds_ratio": -0.13387545943260193, "logits/chosen": 0.9494086503982544, "logits/rejected": 0.9521723985671997, "logps/chosen": -2.0923595428466797, "logps/rejected": -4.485339164733887, "loss": 0.8026, "nll_loss": 0.7892055511474609, "rewards/accuracies": 1.0, "rewards/chosen": -0.20923596620559692, "rewards/margins": 0.23929795622825623, "rewards/rejected": -0.44853392243385315, "step": 1357 }, { "epoch": 3.7180013689253935, "grad_norm": 3.6040141582489014, "learning_rate": 8.13972602739726e-07, "log_odds_chosen": 0.8345486521720886, "log_odds_ratio": -0.6504771709442139, "logits/chosen": 1.0733096599578857, "logits/rejected": 1.1375566720962524, "logps/chosen": -3.435530662536621, "logps/rejected": -4.244300842285156, "loss": 0.8159, "nll_loss": 0.7508546113967896, "rewards/accuracies": 0.625, "rewards/chosen": -0.3435530662536621, "rewards/margins": 0.08087701350450516, "rewards/rejected": -0.42443007230758667, "step": 1358 }, { "epoch": 3.7207392197125255, "grad_norm": 2.152804136276245, "learning_rate": 8.13835616438356e-07, "log_odds_chosen": 1.3788930177688599, "log_odds_ratio": -0.3048133850097656, "logits/chosen": 0.629775881767273, "logits/rejected": 0.5151824951171875, "logps/chosen": -1.394975185394287, "logps/rejected": -2.537466287612915, "loss": 0.8869, "nll_loss": 0.8564345836639404, "rewards/accuracies": 1.0, "rewards/chosen": -0.1394975185394287, "rewards/margins": 0.114249087870121, "rewards/rejected": -0.2537466287612915, "step": 1359 }, { "epoch": 3.7234770704996576, "grad_norm": 1.9872463941574097, "learning_rate": 8.136986301369862e-07, "log_odds_chosen": 1.5384986400604248, "log_odds_ratio": -0.2470412403345108, "logits/chosen": 0.7285274863243103, "logits/rejected": 0.7073729038238525, "logps/chosen": -1.65470290184021, "logps/rejected": -2.996129035949707, "loss": 0.8605, "nll_loss": 0.83575040102005, "rewards/accuracies": 1.0, "rewards/chosen": -0.16547030210494995, "rewards/margins": 0.13414260745048523, "rewards/rejected": -0.2996129095554352, "step": 1360 }, { "epoch": 3.7262149212867897, "grad_norm": 2.292861223220825, "learning_rate": 8.135616438356164e-07, "log_odds_chosen": 1.2442853450775146, "log_odds_ratio": -0.39043253660202026, "logits/chosen": 0.6686471104621887, "logits/rejected": 0.610927939414978, "logps/chosen": -2.3685097694396973, "logps/rejected": -3.5355637073516846, "loss": 0.8575, "nll_loss": 0.8184323310852051, "rewards/accuracies": 0.875, "rewards/chosen": -0.23685099184513092, "rewards/margins": 0.11670538783073425, "rewards/rejected": -0.35355639457702637, "step": 1361 }, { "epoch": 3.7289527720739217, "grad_norm": 3.092848300933838, "learning_rate": 8.134246575342465e-07, "log_odds_chosen": 0.6518845558166504, "log_odds_ratio": -0.6783338785171509, "logits/chosen": 0.7267903089523315, "logits/rejected": 0.7684223651885986, "logps/chosen": -2.72957444190979, "logps/rejected": -3.374403238296509, "loss": 0.9325, "nll_loss": 0.864694356918335, "rewards/accuracies": 0.5, "rewards/chosen": -0.272957444190979, "rewards/margins": 0.06448287516832352, "rewards/rejected": -0.3374403119087219, "step": 1362 }, { "epoch": 3.731690622861054, "grad_norm": 5.639305114746094, "learning_rate": 8.132876712328766e-07, "log_odds_chosen": 1.9250195026397705, "log_odds_ratio": -0.45656630396842957, "logits/chosen": 1.0393990278244019, "logits/rejected": 1.0894397497177124, "logps/chosen": -2.0454907417297363, "logps/rejected": -3.7877871990203857, "loss": 0.8643, "nll_loss": 0.8186748027801514, "rewards/accuracies": 0.75, "rewards/chosen": -0.20454907417297363, "rewards/margins": 0.17422965168952942, "rewards/rejected": -0.37877875566482544, "step": 1363 }, { "epoch": 3.7344284736481863, "grad_norm": 4.254374027252197, "learning_rate": 8.131506849315068e-07, "log_odds_chosen": 1.9310301542282104, "log_odds_ratio": -0.5172951221466064, "logits/chosen": 1.0397515296936035, "logits/rejected": 1.0400149822235107, "logps/chosen": -2.297384023666382, "logps/rejected": -4.0888776779174805, "loss": 0.8168, "nll_loss": 0.7651137113571167, "rewards/accuracies": 0.75, "rewards/chosen": -0.22973838448524475, "rewards/margins": 0.17914941906929016, "rewards/rejected": -0.4088878035545349, "step": 1364 }, { "epoch": 3.7371663244353184, "grad_norm": 3.603909730911255, "learning_rate": 8.130136986301369e-07, "log_odds_chosen": 0.13389232754707336, "log_odds_ratio": -0.8761551380157471, "logits/chosen": 0.742438793182373, "logits/rejected": 0.7807435989379883, "logps/chosen": -3.0809879302978516, "logps/rejected": -3.217947483062744, "loss": 0.8675, "nll_loss": 0.7799063324928284, "rewards/accuracies": 0.5, "rewards/chosen": -0.30809879302978516, "rewards/margins": 0.013695936650037766, "rewards/rejected": -0.3217947483062744, "step": 1365 }, { "epoch": 3.7399041752224504, "grad_norm": 2.1605987548828125, "learning_rate": 8.12876712328767e-07, "log_odds_chosen": 1.035808801651001, "log_odds_ratio": -0.37911808490753174, "logits/chosen": 0.7803268432617188, "logits/rejected": 0.6796610951423645, "logps/chosen": -2.198935031890869, "logps/rejected": -3.1637706756591797, "loss": 0.8095, "nll_loss": 0.7716146111488342, "rewards/accuracies": 0.75, "rewards/chosen": -0.21989348530769348, "rewards/margins": 0.09648359566926956, "rewards/rejected": -0.31637707352638245, "step": 1366 }, { "epoch": 3.7426420260095825, "grad_norm": 2.7596986293792725, "learning_rate": 8.127397260273973e-07, "log_odds_chosen": 0.5969764590263367, "log_odds_ratio": -0.5498230457305908, "logits/chosen": 0.6442337036132812, "logits/rejected": 0.5704637765884399, "logps/chosen": -1.6640501022338867, "logps/rejected": -2.1322665214538574, "loss": 0.889, "nll_loss": 0.8340542912483215, "rewards/accuracies": 0.75, "rewards/chosen": -0.16640502214431763, "rewards/margins": 0.04682164266705513, "rewards/rejected": -0.21322664618492126, "step": 1367 }, { "epoch": 3.7453798767967146, "grad_norm": 2.5906858444213867, "learning_rate": 8.126027397260273e-07, "log_odds_chosen": 0.7706989049911499, "log_odds_ratio": -0.5077695846557617, "logits/chosen": 0.5504945516586304, "logits/rejected": 0.5480431318283081, "logps/chosen": -1.7984044551849365, "logps/rejected": -2.513749361038208, "loss": 0.8933, "nll_loss": 0.8425478339195251, "rewards/accuracies": 0.75, "rewards/chosen": -0.17984044551849365, "rewards/margins": 0.07153449207544327, "rewards/rejected": -0.2513749599456787, "step": 1368 }, { "epoch": 3.7481177275838466, "grad_norm": 2.407503128051758, "learning_rate": 8.124657534246576e-07, "log_odds_chosen": 0.8481361865997314, "log_odds_ratio": -0.38034942746162415, "logits/chosen": 0.6702212691307068, "logits/rejected": 0.617940366268158, "logps/chosen": -1.8439158201217651, "logps/rejected": -2.570018768310547, "loss": 0.8544, "nll_loss": 0.8163273334503174, "rewards/accuracies": 1.0, "rewards/chosen": -0.184391587972641, "rewards/margins": 0.07261031121015549, "rewards/rejected": -0.2570019066333771, "step": 1369 }, { "epoch": 3.7508555783709787, "grad_norm": 2.029350996017456, "learning_rate": 8.123287671232877e-07, "log_odds_chosen": 1.7520534992218018, "log_odds_ratio": -0.31294959783554077, "logits/chosen": 0.9140067100524902, "logits/rejected": 0.9011227488517761, "logps/chosen": -1.7852386236190796, "logps/rejected": -3.301792860031128, "loss": 0.7872, "nll_loss": 0.7558648586273193, "rewards/accuracies": 0.875, "rewards/chosen": -0.17852386832237244, "rewards/margins": 0.1516554206609726, "rewards/rejected": -0.3301793038845062, "step": 1370 }, { "epoch": 3.753593429158111, "grad_norm": 2.3455121517181396, "learning_rate": 8.121917808219178e-07, "log_odds_chosen": 0.8381428718566895, "log_odds_ratio": -0.4200515151023865, "logits/chosen": 0.6312025785446167, "logits/rejected": 0.6159272193908691, "logps/chosen": -1.8882856369018555, "logps/rejected": -2.6265082359313965, "loss": 0.8967, "nll_loss": 0.8547028303146362, "rewards/accuracies": 0.75, "rewards/chosen": -0.18882855772972107, "rewards/margins": 0.0738222599029541, "rewards/rejected": -0.26265081763267517, "step": 1371 }, { "epoch": 3.756331279945243, "grad_norm": 2.6855664253234863, "learning_rate": 8.12054794520548e-07, "log_odds_chosen": 0.9598038196563721, "log_odds_ratio": -0.46318474411964417, "logits/chosen": 0.7126279473304749, "logits/rejected": 0.6628834009170532, "logps/chosen": -2.0550742149353027, "logps/rejected": -2.9599034786224365, "loss": 0.844, "nll_loss": 0.7976623177528381, "rewards/accuracies": 0.75, "rewards/chosen": -0.20550742745399475, "rewards/margins": 0.0904829353094101, "rewards/rejected": -0.29599034786224365, "step": 1372 }, { "epoch": 3.759069130732375, "grad_norm": 2.375040054321289, "learning_rate": 8.11917808219178e-07, "log_odds_chosen": 2.6487345695495605, "log_odds_ratio": -0.44545871019363403, "logits/chosen": 0.8283475041389465, "logits/rejected": 0.7486127614974976, "logps/chosen": -2.1006789207458496, "logps/rejected": -4.671649932861328, "loss": 0.8901, "nll_loss": 0.8455589413642883, "rewards/accuracies": 0.75, "rewards/chosen": -0.21006789803504944, "rewards/margins": 0.257097065448761, "rewards/rejected": -0.4671649634838104, "step": 1373 }, { "epoch": 3.7618069815195074, "grad_norm": 2.5949647426605225, "learning_rate": 8.117808219178082e-07, "log_odds_chosen": 1.3618359565734863, "log_odds_ratio": -0.348593533039093, "logits/chosen": 0.7850483059883118, "logits/rejected": 0.7422698140144348, "logps/chosen": -2.4811630249023438, "logps/rejected": -3.766116142272949, "loss": 0.845, "nll_loss": 0.810163140296936, "rewards/accuracies": 0.875, "rewards/chosen": -0.24811632931232452, "rewards/margins": 0.12849527597427368, "rewards/rejected": -0.376611590385437, "step": 1374 }, { "epoch": 3.7645448323066395, "grad_norm": 2.7061002254486084, "learning_rate": 8.116438356164384e-07, "log_odds_chosen": 2.3112411499023438, "log_odds_ratio": -0.3039892613887787, "logits/chosen": 0.7608320713043213, "logits/rejected": 0.7330641150474548, "logps/chosen": -2.0960707664489746, "logps/rejected": -4.255795955657959, "loss": 0.8786, "nll_loss": 0.8482033610343933, "rewards/accuracies": 0.875, "rewards/chosen": -0.2096070945262909, "rewards/margins": 0.21597249805927277, "rewards/rejected": -0.4255795478820801, "step": 1375 }, { "epoch": 3.7672826830937716, "grad_norm": 4.5482096672058105, "learning_rate": 8.115068493150685e-07, "log_odds_chosen": 0.573217511177063, "log_odds_ratio": -0.7252317667007446, "logits/chosen": 0.5912206768989563, "logits/rejected": 0.5144112706184387, "logps/chosen": -3.1522412300109863, "logps/rejected": -3.644702911376953, "loss": 0.986, "nll_loss": 0.9134900569915771, "rewards/accuracies": 0.625, "rewards/chosen": -0.3152241110801697, "rewards/margins": 0.049246177077293396, "rewards/rejected": -0.36447030305862427, "step": 1376 }, { "epoch": 3.7700205338809036, "grad_norm": 2.296480178833008, "learning_rate": 8.113698630136986e-07, "log_odds_chosen": 0.5756499767303467, "log_odds_ratio": -0.5985327959060669, "logits/chosen": 0.6947448253631592, "logits/rejected": 0.6142244338989258, "logps/chosen": -2.0733237266540527, "logps/rejected": -2.5690011978149414, "loss": 0.8814, "nll_loss": 0.821505069732666, "rewards/accuracies": 0.75, "rewards/chosen": -0.20733240246772766, "rewards/margins": 0.04956771805882454, "rewards/rejected": -0.2569001317024231, "step": 1377 }, { "epoch": 3.7727583846680357, "grad_norm": 2.508474111557007, "learning_rate": 8.112328767123288e-07, "log_odds_chosen": 1.8245818614959717, "log_odds_ratio": -0.2981966435909271, "logits/chosen": 1.1603105068206787, "logits/rejected": 1.156147837638855, "logps/chosen": -1.927765965461731, "logps/rejected": -3.5734424591064453, "loss": 0.7679, "nll_loss": 0.7380676865577698, "rewards/accuracies": 0.875, "rewards/chosen": -0.19277659058570862, "rewards/margins": 0.16456767916679382, "rewards/rejected": -0.35734426975250244, "step": 1378 }, { "epoch": 3.7754962354551678, "grad_norm": 2.632843494415283, "learning_rate": 8.110958904109589e-07, "log_odds_chosen": 1.5729858875274658, "log_odds_ratio": -0.22497013211250305, "logits/chosen": 0.8732958436012268, "logits/rejected": 0.8773931860923767, "logps/chosen": -2.536017417907715, "logps/rejected": -3.9525489807128906, "loss": 0.841, "nll_loss": 0.8184797763824463, "rewards/accuracies": 1.0, "rewards/chosen": -0.25360172986984253, "rewards/margins": 0.1416531354188919, "rewards/rejected": -0.39525488018989563, "step": 1379 }, { "epoch": 3.7782340862423, "grad_norm": 2.1568329334259033, "learning_rate": 8.10958904109589e-07, "log_odds_chosen": 1.533003568649292, "log_odds_ratio": -0.37466225028038025, "logits/chosen": 0.8192782402038574, "logits/rejected": 0.8110753297805786, "logps/chosen": -1.9370934963226318, "logps/rejected": -3.3772475719451904, "loss": 0.8839, "nll_loss": 0.8464536666870117, "rewards/accuracies": 1.0, "rewards/chosen": -0.1937093436717987, "rewards/margins": 0.14401541650295258, "rewards/rejected": -0.3377247452735901, "step": 1380 }, { "epoch": 3.780971937029432, "grad_norm": 2.376258611679077, "learning_rate": 8.108219178082192e-07, "log_odds_chosen": 1.553204894065857, "log_odds_ratio": -0.35688844323158264, "logits/chosen": 0.6721227765083313, "logits/rejected": 0.5437523126602173, "logps/chosen": -2.053896188735962, "logps/rejected": -3.473848342895508, "loss": 0.9169, "nll_loss": 0.8811813592910767, "rewards/accuracies": 0.75, "rewards/chosen": -0.2053896188735962, "rewards/margins": 0.14199520647525787, "rewards/rejected": -0.34738481044769287, "step": 1381 }, { "epoch": 3.783709787816564, "grad_norm": 2.0715866088867188, "learning_rate": 8.106849315068493e-07, "log_odds_chosen": 2.02691650390625, "log_odds_ratio": -0.23707042634487152, "logits/chosen": 0.49813079833984375, "logits/rejected": 0.4764802157878876, "logps/chosen": -1.7543693780899048, "logps/rejected": -3.5132949352264404, "loss": 0.8131, "nll_loss": 0.7893905639648438, "rewards/accuracies": 1.0, "rewards/chosen": -0.17543692886829376, "rewards/margins": 0.17589256167411804, "rewards/rejected": -0.351329505443573, "step": 1382 }, { "epoch": 3.786447638603696, "grad_norm": 2.761747121810913, "learning_rate": 8.105479452054795e-07, "log_odds_chosen": 0.6574488878250122, "log_odds_ratio": -0.513660192489624, "logits/chosen": 0.7770217061042786, "logits/rejected": 0.7142778635025024, "logps/chosen": -1.8165709972381592, "logps/rejected": -2.398569107055664, "loss": 0.8917, "nll_loss": 0.8403295278549194, "rewards/accuracies": 0.75, "rewards/chosen": -0.181657075881958, "rewards/margins": 0.05819982290267944, "rewards/rejected": -0.23985689878463745, "step": 1383 }, { "epoch": 3.789185489390828, "grad_norm": 2.3545918464660645, "learning_rate": 8.104109589041096e-07, "log_odds_chosen": 2.0977280139923096, "log_odds_ratio": -0.34608572721481323, "logits/chosen": 0.7256085872650146, "logits/rejected": 0.6783857345581055, "logps/chosen": -2.1438722610473633, "logps/rejected": -4.123875617980957, "loss": 0.9554, "nll_loss": 0.9207743406295776, "rewards/accuracies": 0.875, "rewards/chosen": -0.21438723802566528, "rewards/margins": 0.19800032675266266, "rewards/rejected": -0.41238757967948914, "step": 1384 }, { "epoch": 3.79192334017796, "grad_norm": 2.1216070652008057, "learning_rate": 8.102739726027397e-07, "log_odds_chosen": 1.076794147491455, "log_odds_ratio": -0.3693329691886902, "logits/chosen": 0.9670923948287964, "logits/rejected": 0.9261971712112427, "logps/chosen": -1.7305089235305786, "logps/rejected": -2.6616415977478027, "loss": 0.8555, "nll_loss": 0.8185542821884155, "rewards/accuracies": 1.0, "rewards/chosen": -0.1730509102344513, "rewards/margins": 0.09311327338218689, "rewards/rejected": -0.2661641538143158, "step": 1385 }, { "epoch": 3.7946611909650922, "grad_norm": 2.0717978477478027, "learning_rate": 8.101369863013699e-07, "log_odds_chosen": 0.9189177751541138, "log_odds_ratio": -0.405362069606781, "logits/chosen": 0.878962516784668, "logits/rejected": 0.8101530075073242, "logps/chosen": -2.0039587020874023, "logps/rejected": -2.8423714637756348, "loss": 0.842, "nll_loss": 0.8014527559280396, "rewards/accuracies": 1.0, "rewards/chosen": -0.200395867228508, "rewards/margins": 0.08384130150079727, "rewards/rejected": -0.2842371463775635, "step": 1386 }, { "epoch": 3.7973990417522243, "grad_norm": 3.9298810958862305, "learning_rate": 8.1e-07, "log_odds_chosen": 0.3847180902957916, "log_odds_ratio": -0.7954179048538208, "logits/chosen": 0.7074968814849854, "logits/rejected": 0.7148204445838928, "logps/chosen": -3.0193257331848145, "logps/rejected": -3.337286949157715, "loss": 0.9677, "nll_loss": 0.8881497979164124, "rewards/accuracies": 0.75, "rewards/chosen": -0.30193260312080383, "rewards/margins": 0.03179612755775452, "rewards/rejected": -0.33372870087623596, "step": 1387 }, { "epoch": 3.8001368925393564, "grad_norm": 1.8356037139892578, "learning_rate": 8.098630136986301e-07, "log_odds_chosen": 1.2323293685913086, "log_odds_ratio": -0.4205569326877594, "logits/chosen": 0.7121095657348633, "logits/rejected": 0.6616228222846985, "logps/chosen": -2.0304160118103027, "logps/rejected": -3.2072348594665527, "loss": 0.9228, "nll_loss": 0.8807170987129211, "rewards/accuracies": 0.75, "rewards/chosen": -0.20304161310195923, "rewards/margins": 0.11768186092376709, "rewards/rejected": -0.3207234740257263, "step": 1388 }, { "epoch": 3.8028747433264884, "grad_norm": 2.6010780334472656, "learning_rate": 8.097260273972603e-07, "log_odds_chosen": 1.000632405281067, "log_odds_ratio": -0.38594841957092285, "logits/chosen": 0.8685957789421082, "logits/rejected": 0.8406524658203125, "logps/chosen": -2.391270637512207, "logps/rejected": -3.294262409210205, "loss": 0.8946, "nll_loss": 0.8559565544128418, "rewards/accuracies": 0.75, "rewards/chosen": -0.23912706971168518, "rewards/margins": 0.09029921144247055, "rewards/rejected": -0.32942628860473633, "step": 1389 }, { "epoch": 3.805612594113621, "grad_norm": 2.498516798019409, "learning_rate": 8.095890410958903e-07, "log_odds_chosen": 0.8819225430488586, "log_odds_ratio": -0.42983129620552063, "logits/chosen": 0.7232663035392761, "logits/rejected": 0.6516329646110535, "logps/chosen": -1.9896104335784912, "logps/rejected": -2.73935604095459, "loss": 0.84, "nll_loss": 0.797054648399353, "rewards/accuracies": 0.875, "rewards/chosen": -0.1989610493183136, "rewards/margins": 0.07497458159923553, "rewards/rejected": -0.27393561601638794, "step": 1390 }, { "epoch": 3.808350444900753, "grad_norm": 2.583711624145508, "learning_rate": 8.094520547945205e-07, "log_odds_chosen": 1.5559704303741455, "log_odds_ratio": -0.24746479094028473, "logits/chosen": 1.0135716199874878, "logits/rejected": 0.9887793064117432, "logps/chosen": -2.2446675300598145, "logps/rejected": -3.6823346614837646, "loss": 0.7493, "nll_loss": 0.7245283126831055, "rewards/accuracies": 1.0, "rewards/chosen": -0.22446675598621368, "rewards/margins": 0.14376673102378845, "rewards/rejected": -0.36823350191116333, "step": 1391 }, { "epoch": 3.811088295687885, "grad_norm": 2.7998790740966797, "learning_rate": 8.093150684931507e-07, "log_odds_chosen": 0.41940736770629883, "log_odds_ratio": -0.6640036106109619, "logits/chosen": 0.9578389525413513, "logits/rejected": 0.9755409955978394, "logps/chosen": -2.1766037940979004, "logps/rejected": -2.5424067974090576, "loss": 0.8355, "nll_loss": 0.7690693140029907, "rewards/accuracies": 0.875, "rewards/chosen": -0.21766036748886108, "rewards/margins": 0.03658033534884453, "rewards/rejected": -0.2542407214641571, "step": 1392 }, { "epoch": 3.813826146475017, "grad_norm": 2.2769227027893066, "learning_rate": 8.091780821917808e-07, "log_odds_chosen": 1.5640300512313843, "log_odds_ratio": -0.2760784924030304, "logits/chosen": 1.1621166467666626, "logits/rejected": 1.184125542640686, "logps/chosen": -2.28293514251709, "logps/rejected": -3.7481236457824707, "loss": 0.7357, "nll_loss": 0.7081051468849182, "rewards/accuracies": 1.0, "rewards/chosen": -0.2282935082912445, "rewards/margins": 0.14651885628700256, "rewards/rejected": -0.37481236457824707, "step": 1393 }, { "epoch": 3.8165639972621492, "grad_norm": 3.4339606761932373, "learning_rate": 8.090410958904109e-07, "log_odds_chosen": 1.3761674165725708, "log_odds_ratio": -0.3395956754684448, "logits/chosen": 0.8940438628196716, "logits/rejected": 0.8025200366973877, "logps/chosen": -2.9573190212249756, "logps/rejected": -4.270823955535889, "loss": 0.9636, "nll_loss": 0.9296899437904358, "rewards/accuracies": 0.875, "rewards/chosen": -0.29573187232017517, "rewards/margins": 0.13135050237178802, "rewards/rejected": -0.427082359790802, "step": 1394 }, { "epoch": 3.8193018480492813, "grad_norm": 2.6298632621765137, "learning_rate": 8.089041095890411e-07, "log_odds_chosen": 1.1272211074829102, "log_odds_ratio": -0.45956504344940186, "logits/chosen": 0.9234606027603149, "logits/rejected": 0.9603347778320312, "logps/chosen": -2.3506317138671875, "logps/rejected": -3.3907876014709473, "loss": 0.7766, "nll_loss": 0.7306129932403564, "rewards/accuracies": 0.875, "rewards/chosen": -0.23506318032741547, "rewards/margins": 0.10401557385921478, "rewards/rejected": -0.33907875418663025, "step": 1395 }, { "epoch": 3.8220396988364134, "grad_norm": 2.265725612640381, "learning_rate": 8.087671232876712e-07, "log_odds_chosen": 2.0133676528930664, "log_odds_ratio": -0.21575379371643066, "logits/chosen": 0.62775719165802, "logits/rejected": 0.5526186227798462, "logps/chosen": -2.115835428237915, "logps/rejected": -3.988755702972412, "loss": 0.8425, "nll_loss": 0.8208791613578796, "rewards/accuracies": 1.0, "rewards/chosen": -0.21158356964588165, "rewards/margins": 0.18729202449321747, "rewards/rejected": -0.39887556433677673, "step": 1396 }, { "epoch": 3.8247775496235454, "grad_norm": 3.635596513748169, "learning_rate": 8.086301369863014e-07, "log_odds_chosen": 0.8701150417327881, "log_odds_ratio": -0.7370292544364929, "logits/chosen": 1.2073742151260376, "logits/rejected": 1.224785327911377, "logps/chosen": -3.133406639099121, "logps/rejected": -3.9022927284240723, "loss": 0.7496, "nll_loss": 0.6758580207824707, "rewards/accuracies": 0.875, "rewards/chosen": -0.3133406639099121, "rewards/margins": 0.07688860595226288, "rewards/rejected": -0.3902292847633362, "step": 1397 }, { "epoch": 3.8275154004106775, "grad_norm": 2.8430025577545166, "learning_rate": 8.084931506849315e-07, "log_odds_chosen": 1.431117296218872, "log_odds_ratio": -0.3978429138660431, "logits/chosen": 0.7319777011871338, "logits/rejected": 0.675931990146637, "logps/chosen": -2.4111785888671875, "logps/rejected": -3.740468740463257, "loss": 0.7816, "nll_loss": 0.7418583035469055, "rewards/accuracies": 0.875, "rewards/chosen": -0.24111783504486084, "rewards/margins": 0.1329290121793747, "rewards/rejected": -0.37404686212539673, "step": 1398 }, { "epoch": 3.8302532511978096, "grad_norm": 2.450429916381836, "learning_rate": 8.083561643835616e-07, "log_odds_chosen": 1.1562001705169678, "log_odds_ratio": -0.38755321502685547, "logits/chosen": 0.9780750274658203, "logits/rejected": 0.9725297093391418, "logps/chosen": -2.3706915378570557, "logps/rejected": -3.4475064277648926, "loss": 0.7731, "nll_loss": 0.7343838214874268, "rewards/accuracies": 0.75, "rewards/chosen": -0.23706914484500885, "rewards/margins": 0.10768149793148041, "rewards/rejected": -0.34475064277648926, "step": 1399 }, { "epoch": 3.832991101984942, "grad_norm": 2.4066905975341797, "learning_rate": 8.082191780821918e-07, "log_odds_chosen": 1.1238603591918945, "log_odds_ratio": -0.3653010427951813, "logits/chosen": 0.5421079397201538, "logits/rejected": 0.5155237317085266, "logps/chosen": -2.1356201171875, "logps/rejected": -3.168013095855713, "loss": 0.842, "nll_loss": 0.8055163621902466, "rewards/accuracies": 1.0, "rewards/chosen": -0.2135619968175888, "rewards/margins": 0.10323932766914368, "rewards/rejected": -0.3168013095855713, "step": 1400 }, { "epoch": 3.835728952772074, "grad_norm": 3.780984401702881, "learning_rate": 8.080821917808219e-07, "log_odds_chosen": 0.8301492929458618, "log_odds_ratio": -0.6627556681632996, "logits/chosen": 0.6279081702232361, "logits/rejected": 0.645095944404602, "logps/chosen": -2.287229061126709, "logps/rejected": -3.0386850833892822, "loss": 0.9378, "nll_loss": 0.8715298771858215, "rewards/accuracies": 0.75, "rewards/chosen": -0.22872291505336761, "rewards/margins": 0.07514560222625732, "rewards/rejected": -0.30386853218078613, "step": 1401 }, { "epoch": 3.838466803559206, "grad_norm": 2.276648759841919, "learning_rate": 8.07945205479452e-07, "log_odds_chosen": 1.786008596420288, "log_odds_ratio": -0.3614914119243622, "logits/chosen": 1.023713231086731, "logits/rejected": 0.9719071388244629, "logps/chosen": -1.95590078830719, "logps/rejected": -3.620004653930664, "loss": 0.8421, "nll_loss": 0.8059923648834229, "rewards/accuracies": 1.0, "rewards/chosen": -0.195590078830719, "rewards/margins": 0.16641035676002502, "rewards/rejected": -0.3620004653930664, "step": 1402 }, { "epoch": 3.8412046543463383, "grad_norm": 2.2132959365844727, "learning_rate": 8.078082191780822e-07, "log_odds_chosen": 1.1089537143707275, "log_odds_ratio": -0.4164654314517975, "logits/chosen": 0.6613945364952087, "logits/rejected": 0.668695867061615, "logps/chosen": -1.9492664337158203, "logps/rejected": -2.98710298538208, "loss": 0.8958, "nll_loss": 0.8541676998138428, "rewards/accuracies": 0.875, "rewards/chosen": -0.19492663443088531, "rewards/margins": 0.10378366708755493, "rewards/rejected": -0.29871031641960144, "step": 1403 }, { "epoch": 3.8439425051334704, "grad_norm": 3.1395010948181152, "learning_rate": 8.076712328767122e-07, "log_odds_chosen": 1.1600124835968018, "log_odds_ratio": -0.4621726870536804, "logits/chosen": 0.7532013654708862, "logits/rejected": 0.699144721031189, "logps/chosen": -2.0382437705993652, "logps/rejected": -3.115813970565796, "loss": 0.8035, "nll_loss": 0.7572392225265503, "rewards/accuracies": 0.875, "rewards/chosen": -0.20382435619831085, "rewards/margins": 0.10775704681873322, "rewards/rejected": -0.31158140301704407, "step": 1404 }, { "epoch": 3.8466803559206024, "grad_norm": 4.248421669006348, "learning_rate": 8.075342465753424e-07, "log_odds_chosen": 1.1060622930526733, "log_odds_ratio": -0.6306191682815552, "logits/chosen": 0.9514123201370239, "logits/rejected": 0.9106419682502747, "logps/chosen": -3.254361629486084, "logps/rejected": -4.287206649780273, "loss": 0.8086, "nll_loss": 0.7455030679702759, "rewards/accuracies": 0.75, "rewards/chosen": -0.32543617486953735, "rewards/margins": 0.10328446328639984, "rewards/rejected": -0.4287206530570984, "step": 1405 }, { "epoch": 3.8494182067077345, "grad_norm": 2.2860753536224365, "learning_rate": 8.073972602739726e-07, "log_odds_chosen": 0.806206464767456, "log_odds_ratio": -0.45135921239852905, "logits/chosen": 0.769303023815155, "logits/rejected": 0.7341440916061401, "logps/chosen": -1.9963955879211426, "logps/rejected": -2.760242223739624, "loss": 0.8103, "nll_loss": 0.7651674151420593, "rewards/accuracies": 0.875, "rewards/chosen": -0.19963955879211426, "rewards/margins": 0.07638464868068695, "rewards/rejected": -0.2760242223739624, "step": 1406 }, { "epoch": 3.8521560574948666, "grad_norm": 2.9214203357696533, "learning_rate": 8.072602739726027e-07, "log_odds_chosen": 0.2707361578941345, "log_odds_ratio": -0.6482676863670349, "logits/chosen": 1.0234185457229614, "logits/rejected": 1.015917420387268, "logps/chosen": -2.686685085296631, "logps/rejected": -2.9132628440856934, "loss": 0.8124, "nll_loss": 0.7475548386573792, "rewards/accuracies": 0.75, "rewards/chosen": -0.2686685025691986, "rewards/margins": 0.022657765075564384, "rewards/rejected": -0.29132628440856934, "step": 1407 }, { "epoch": 3.8548939082819986, "grad_norm": 5.811946868896484, "learning_rate": 8.071232876712328e-07, "log_odds_chosen": 0.5079224705696106, "log_odds_ratio": -0.8583106994628906, "logits/chosen": 0.8206631541252136, "logits/rejected": 0.8821816444396973, "logps/chosen": -2.8368308544158936, "logps/rejected": -3.311999559402466, "loss": 0.8351, "nll_loss": 0.7493088841438293, "rewards/accuracies": 0.625, "rewards/chosen": -0.28368309140205383, "rewards/margins": 0.04751686006784439, "rewards/rejected": -0.33119997382164, "step": 1408 }, { "epoch": 3.8576317590691307, "grad_norm": 2.131791830062866, "learning_rate": 8.06986301369863e-07, "log_odds_chosen": 2.450852155685425, "log_odds_ratio": -0.20471379160881042, "logits/chosen": 0.883365273475647, "logits/rejected": 0.9111937880516052, "logps/chosen": -1.8743441104888916, "logps/rejected": -4.0991129875183105, "loss": 0.7495, "nll_loss": 0.7290019989013672, "rewards/accuracies": 1.0, "rewards/chosen": -0.18743440508842468, "rewards/margins": 0.22247689962387085, "rewards/rejected": -0.40991130471229553, "step": 1409 }, { "epoch": 3.8603696098562628, "grad_norm": 2.3465731143951416, "learning_rate": 8.068493150684931e-07, "log_odds_chosen": 1.4596635103225708, "log_odds_ratio": -0.29049795866012573, "logits/chosen": 0.7766754627227783, "logits/rejected": 0.7286646962165833, "logps/chosen": -1.6275348663330078, "logps/rejected": -2.874403953552246, "loss": 0.8673, "nll_loss": 0.8382490873336792, "rewards/accuracies": 1.0, "rewards/chosen": -0.16275349259376526, "rewards/margins": 0.12468690425157547, "rewards/rejected": -0.28744038939476013, "step": 1410 }, { "epoch": 3.863107460643395, "grad_norm": 2.8779945373535156, "learning_rate": 8.067123287671232e-07, "log_odds_chosen": 1.4850568771362305, "log_odds_ratio": -0.3263893723487854, "logits/chosen": 0.6902545094490051, "logits/rejected": 0.6633020639419556, "logps/chosen": -2.4518120288848877, "logps/rejected": -3.7995071411132812, "loss": 0.8035, "nll_loss": 0.7708225846290588, "rewards/accuracies": 0.75, "rewards/chosen": -0.24518123269081116, "rewards/margins": 0.1347694993019104, "rewards/rejected": -0.37995073199272156, "step": 1411 }, { "epoch": 3.865845311430527, "grad_norm": 2.415266752243042, "learning_rate": 8.065753424657534e-07, "log_odds_chosen": 1.159058690071106, "log_odds_ratio": -0.4471445083618164, "logits/chosen": 0.9011852741241455, "logits/rejected": 0.9234828948974609, "logps/chosen": -2.3007540702819824, "logps/rejected": -3.386270046234131, "loss": 0.7852, "nll_loss": 0.740533173084259, "rewards/accuracies": 0.75, "rewards/chosen": -0.230075404047966, "rewards/margins": 0.10855163633823395, "rewards/rejected": -0.33862704038619995, "step": 1412 }, { "epoch": 3.868583162217659, "grad_norm": 2.6915056705474854, "learning_rate": 8.064383561643835e-07, "log_odds_chosen": 1.2424052953720093, "log_odds_ratio": -0.5636533498764038, "logits/chosen": 0.5782846808433533, "logits/rejected": 0.4865760803222656, "logps/chosen": -1.999825358390808, "logps/rejected": -3.0790390968322754, "loss": 0.8977, "nll_loss": 0.8413230776786804, "rewards/accuracies": 0.875, "rewards/chosen": -0.19998255372047424, "rewards/margins": 0.10792136192321777, "rewards/rejected": -0.307903915643692, "step": 1413 }, { "epoch": 3.871321013004791, "grad_norm": 3.3900558948516846, "learning_rate": 8.063013698630137e-07, "log_odds_chosen": 1.6839114427566528, "log_odds_ratio": -0.34225642681121826, "logits/chosen": 0.9122930765151978, "logits/rejected": 0.9591209888458252, "logps/chosen": -2.675654649734497, "logps/rejected": -4.273980617523193, "loss": 0.8951, "nll_loss": 0.8608841896057129, "rewards/accuracies": 0.875, "rewards/chosen": -0.2675654888153076, "rewards/margins": 0.15983261168003082, "rewards/rejected": -0.42739808559417725, "step": 1414 }, { "epoch": 3.874058863791923, "grad_norm": 2.106419563293457, "learning_rate": 8.061643835616438e-07, "log_odds_chosen": 1.6723506450653076, "log_odds_ratio": -0.2437257468700409, "logits/chosen": 0.9113203883171082, "logits/rejected": 0.8520525693893433, "logps/chosen": -1.5182147026062012, "logps/rejected": -3.012162923812866, "loss": 0.8242, "nll_loss": 0.7997860908508301, "rewards/accuracies": 1.0, "rewards/chosen": -0.15182147920131683, "rewards/margins": 0.14939481019973755, "rewards/rejected": -0.3012163043022156, "step": 1415 }, { "epoch": 3.876796714579055, "grad_norm": 2.2412822246551514, "learning_rate": 8.060273972602739e-07, "log_odds_chosen": 0.7748862504959106, "log_odds_ratio": -0.45793700218200684, "logits/chosen": 0.6222201585769653, "logits/rejected": 0.5242700576782227, "logps/chosen": -2.2089977264404297, "logps/rejected": -2.9447991847991943, "loss": 0.9564, "nll_loss": 0.9106246829032898, "rewards/accuracies": 0.875, "rewards/chosen": -0.2208997756242752, "rewards/margins": 0.07358013093471527, "rewards/rejected": -0.2944799065589905, "step": 1416 }, { "epoch": 3.8795345653661877, "grad_norm": 2.0457489490509033, "learning_rate": 8.058904109589041e-07, "log_odds_chosen": 1.6986145973205566, "log_odds_ratio": -0.39483919739723206, "logits/chosen": 1.0347368717193604, "logits/rejected": 1.042661428451538, "logps/chosen": -1.8791673183441162, "logps/rejected": -3.4079477787017822, "loss": 0.8398, "nll_loss": 0.8002825975418091, "rewards/accuracies": 0.625, "rewards/chosen": -0.18791674077510834, "rewards/margins": 0.1528780460357666, "rewards/rejected": -0.34079480171203613, "step": 1417 }, { "epoch": 3.8822724161533197, "grad_norm": 2.0549116134643555, "learning_rate": 8.057534246575342e-07, "log_odds_chosen": 1.8102582693099976, "log_odds_ratio": -0.30048319697380066, "logits/chosen": 0.9666380882263184, "logits/rejected": 0.9466925859451294, "logps/chosen": -2.387108087539673, "logps/rejected": -4.119697570800781, "loss": 0.7967, "nll_loss": 0.7666806578636169, "rewards/accuracies": 0.875, "rewards/chosen": -0.23871082067489624, "rewards/margins": 0.17325899004936218, "rewards/rejected": -0.41196978092193604, "step": 1418 }, { "epoch": 3.885010266940452, "grad_norm": 3.134568214416504, "learning_rate": 8.056164383561643e-07, "log_odds_chosen": 0.8756164908409119, "log_odds_ratio": -0.47304975986480713, "logits/chosen": 0.8412266969680786, "logits/rejected": 0.8559533357620239, "logps/chosen": -2.69140362739563, "logps/rejected": -3.452061176300049, "loss": 0.8868, "nll_loss": 0.8395118713378906, "rewards/accuracies": 0.875, "rewards/chosen": -0.2691403329372406, "rewards/margins": 0.07606580853462219, "rewards/rejected": -0.3452061414718628, "step": 1419 }, { "epoch": 3.887748117727584, "grad_norm": 3.8583223819732666, "learning_rate": 8.054794520547945e-07, "log_odds_chosen": 0.03023068793118, "log_odds_ratio": -0.8240746259689331, "logits/chosen": 0.990292489528656, "logits/rejected": 0.9607193470001221, "logps/chosen": -3.061277389526367, "logps/rejected": -3.065927028656006, "loss": 0.9751, "nll_loss": 0.8926551342010498, "rewards/accuracies": 0.625, "rewards/chosen": -0.30612775683403015, "rewards/margins": 0.00046496838331222534, "rewards/rejected": -0.306592732667923, "step": 1420 }, { "epoch": 3.890485968514716, "grad_norm": 4.609938144683838, "learning_rate": 8.053424657534246e-07, "log_odds_chosen": 0.8289036154747009, "log_odds_ratio": -0.5594321489334106, "logits/chosen": 0.7520806193351746, "logits/rejected": 0.814568281173706, "logps/chosen": -3.128046989440918, "logps/rejected": -3.8591105937957764, "loss": 0.8481, "nll_loss": 0.7921971678733826, "rewards/accuracies": 0.75, "rewards/chosen": -0.3128046989440918, "rewards/margins": 0.07310634851455688, "rewards/rejected": -0.3859110176563263, "step": 1421 }, { "epoch": 3.893223819301848, "grad_norm": 3.190526008605957, "learning_rate": 8.052054794520547e-07, "log_odds_chosen": 1.8718926906585693, "log_odds_ratio": -0.2935068607330322, "logits/chosen": 0.5693343877792358, "logits/rejected": 0.49084246158599854, "logps/chosen": -1.831277847290039, "logps/rejected": -3.53348445892334, "loss": 0.8355, "nll_loss": 0.806164562702179, "rewards/accuracies": 1.0, "rewards/chosen": -0.18312779068946838, "rewards/margins": 0.17022061347961426, "rewards/rejected": -0.35334843397140503, "step": 1422 }, { "epoch": 3.89596167008898, "grad_norm": 2.620107650756836, "learning_rate": 8.050684931506849e-07, "log_odds_chosen": 0.952032208442688, "log_odds_ratio": -0.4430750906467438, "logits/chosen": 0.864591658115387, "logits/rejected": 0.8642458915710449, "logps/chosen": -2.5552515983581543, "logps/rejected": -3.4371719360351562, "loss": 0.8253, "nll_loss": 0.7810052633285522, "rewards/accuracies": 0.75, "rewards/chosen": -0.2555251717567444, "rewards/margins": 0.08819204568862915, "rewards/rejected": -0.34371721744537354, "step": 1423 }, { "epoch": 3.898699520876112, "grad_norm": 3.6267926692962646, "learning_rate": 8.04931506849315e-07, "log_odds_chosen": 1.2172863483428955, "log_odds_ratio": -0.7110518217086792, "logits/chosen": 0.7835668325424194, "logits/rejected": 0.6649975776672363, "logps/chosen": -2.929615020751953, "logps/rejected": -4.065134525299072, "loss": 0.9275, "nll_loss": 0.8564152717590332, "rewards/accuracies": 0.625, "rewards/chosen": -0.2929615378379822, "rewards/margins": 0.11355195939540863, "rewards/rejected": -0.4065134823322296, "step": 1424 }, { "epoch": 3.9014373716632442, "grad_norm": 2.1041781902313232, "learning_rate": 8.047945205479451e-07, "log_odds_chosen": 2.237203598022461, "log_odds_ratio": -0.2838314175605774, "logits/chosen": 0.47965511679649353, "logits/rejected": 0.34626632928848267, "logps/chosen": -1.7775945663452148, "logps/rejected": -3.876204013824463, "loss": 0.865, "nll_loss": 0.8366519212722778, "rewards/accuracies": 0.875, "rewards/chosen": -0.17775945365428925, "rewards/margins": 0.20986098051071167, "rewards/rejected": -0.3876204192638397, "step": 1425 }, { "epoch": 3.9041752224503763, "grad_norm": 3.0346171855926514, "learning_rate": 8.046575342465753e-07, "log_odds_chosen": 1.4238272905349731, "log_odds_ratio": -0.39518633484840393, "logits/chosen": 0.9417628645896912, "logits/rejected": 1.0168724060058594, "logps/chosen": -2.897834062576294, "logps/rejected": -4.253503799438477, "loss": 0.8371, "nll_loss": 0.7975948452949524, "rewards/accuracies": 0.75, "rewards/chosen": -0.28978341817855835, "rewards/margins": 0.13556697964668274, "rewards/rejected": -0.4253503680229187, "step": 1426 }, { "epoch": 3.906913073237509, "grad_norm": 3.48388671875, "learning_rate": 8.045205479452054e-07, "log_odds_chosen": 0.254940390586853, "log_odds_ratio": -0.8179802894592285, "logits/chosen": 0.8508687019348145, "logits/rejected": 0.8904701471328735, "logps/chosen": -2.8998496532440186, "logps/rejected": -3.152894973754883, "loss": 0.8824, "nll_loss": 0.8006035685539246, "rewards/accuracies": 0.5, "rewards/chosen": -0.28998494148254395, "rewards/margins": 0.025304529815912247, "rewards/rejected": -0.3152894973754883, "step": 1427 }, { "epoch": 3.909650924024641, "grad_norm": 3.5613675117492676, "learning_rate": 8.043835616438356e-07, "log_odds_chosen": 2.126823902130127, "log_odds_ratio": -0.6835551857948303, "logits/chosen": 0.9778028726577759, "logits/rejected": 0.9867013692855835, "logps/chosen": -2.3914709091186523, "logps/rejected": -4.464504241943359, "loss": 0.8992, "nll_loss": 0.8307977318763733, "rewards/accuracies": 0.5, "rewards/chosen": -0.23914708197116852, "rewards/margins": 0.20730334520339966, "rewards/rejected": -0.446450412273407, "step": 1428 }, { "epoch": 3.912388774811773, "grad_norm": 2.8694496154785156, "learning_rate": 8.042465753424657e-07, "log_odds_chosen": 1.1297310590744019, "log_odds_ratio": -0.4841606914997101, "logits/chosen": 0.8513253927230835, "logits/rejected": 0.8126022815704346, "logps/chosen": -2.6582248210906982, "logps/rejected": -3.725233316421509, "loss": 0.8897, "nll_loss": 0.8412688970565796, "rewards/accuracies": 0.625, "rewards/chosen": -0.26582247018814087, "rewards/margins": 0.10670085996389389, "rewards/rejected": -0.37252336740493774, "step": 1429 }, { "epoch": 3.915126625598905, "grad_norm": 2.124319076538086, "learning_rate": 8.041095890410958e-07, "log_odds_chosen": 2.59745454788208, "log_odds_ratio": -0.28943511843681335, "logits/chosen": 0.7562879323959351, "logits/rejected": 0.7393187880516052, "logps/chosen": -2.1624250411987305, "logps/rejected": -4.655850410461426, "loss": 0.8338, "nll_loss": 0.8048424124717712, "rewards/accuracies": 1.0, "rewards/chosen": -0.21624252200126648, "rewards/margins": 0.24934256076812744, "rewards/rejected": -0.4655850827693939, "step": 1430 }, { "epoch": 3.917864476386037, "grad_norm": 3.1878249645233154, "learning_rate": 8.03972602739726e-07, "log_odds_chosen": 1.498064637184143, "log_odds_ratio": -0.3676995635032654, "logits/chosen": 1.08901846408844, "logits/rejected": 1.0417790412902832, "logps/chosen": -2.9555439949035645, "logps/rejected": -4.361232280731201, "loss": 0.8482, "nll_loss": 0.8114475011825562, "rewards/accuracies": 0.75, "rewards/chosen": -0.29555439949035645, "rewards/margins": 0.140568807721138, "rewards/rejected": -0.43612322211265564, "step": 1431 }, { "epoch": 3.920602327173169, "grad_norm": 2.6158053874969482, "learning_rate": 8.038356164383561e-07, "log_odds_chosen": 1.3958230018615723, "log_odds_ratio": -0.31793493032455444, "logits/chosen": 0.5951333045959473, "logits/rejected": 0.561390221118927, "logps/chosen": -2.1831114292144775, "logps/rejected": -3.4560868740081787, "loss": 0.8229, "nll_loss": 0.7910923957824707, "rewards/accuracies": 1.0, "rewards/chosen": -0.2183111608028412, "rewards/margins": 0.1272975504398346, "rewards/rejected": -0.3456087112426758, "step": 1432 }, { "epoch": 3.923340177960301, "grad_norm": 2.2699544429779053, "learning_rate": 8.036986301369862e-07, "log_odds_chosen": 2.018004894256592, "log_odds_ratio": -0.2922894358634949, "logits/chosen": 0.7771443128585815, "logits/rejected": 0.7691035270690918, "logps/chosen": -2.993713617324829, "logps/rejected": -4.763916015625, "loss": 0.881, "nll_loss": 0.8518153429031372, "rewards/accuracies": 0.875, "rewards/chosen": -0.2993713617324829, "rewards/margins": 0.17702025175094604, "rewards/rejected": -0.47639161348342896, "step": 1433 }, { "epoch": 3.9260780287474333, "grad_norm": 2.754840135574341, "learning_rate": 8.035616438356164e-07, "log_odds_chosen": 0.6440228223800659, "log_odds_ratio": -0.4645014703273773, "logits/chosen": 0.744157612323761, "logits/rejected": 0.7187560796737671, "logps/chosen": -2.6080636978149414, "logps/rejected": -3.1945712566375732, "loss": 0.8914, "nll_loss": 0.8449510335922241, "rewards/accuracies": 0.875, "rewards/chosen": -0.2608063817024231, "rewards/margins": 0.05865075811743736, "rewards/rejected": -0.31945711374282837, "step": 1434 }, { "epoch": 3.9288158795345653, "grad_norm": 1.8530086278915405, "learning_rate": 8.034246575342465e-07, "log_odds_chosen": 1.8287756443023682, "log_odds_ratio": -0.2358807623386383, "logits/chosen": 0.9268983602523804, "logits/rejected": 0.9721277952194214, "logps/chosen": -1.9465446472167969, "logps/rejected": -3.6297948360443115, "loss": 0.7307, "nll_loss": 0.7071353197097778, "rewards/accuracies": 1.0, "rewards/chosen": -0.1946544647216797, "rewards/margins": 0.1683250367641449, "rewards/rejected": -0.3629795014858246, "step": 1435 }, { "epoch": 3.9315537303216974, "grad_norm": 2.779841661453247, "learning_rate": 8.032876712328766e-07, "log_odds_chosen": 1.6306902170181274, "log_odds_ratio": -0.34784820675849915, "logits/chosen": 1.078263759613037, "logits/rejected": 1.0906835794448853, "logps/chosen": -2.025973081588745, "logps/rejected": -3.5074000358581543, "loss": 0.6852, "nll_loss": 0.650384247303009, "rewards/accuracies": 0.875, "rewards/chosen": -0.20259730517864227, "rewards/margins": 0.1481427252292633, "rewards/rejected": -0.3507400155067444, "step": 1436 }, { "epoch": 3.9342915811088295, "grad_norm": 3.0646326541900635, "learning_rate": 8.031506849315068e-07, "log_odds_chosen": 0.7629811763763428, "log_odds_ratio": -0.6828715801239014, "logits/chosen": 0.5207920074462891, "logits/rejected": 0.4702116847038269, "logps/chosen": -2.3839163780212402, "logps/rejected": -3.054258108139038, "loss": 0.8303, "nll_loss": 0.7619702219963074, "rewards/accuracies": 0.75, "rewards/chosen": -0.23839163780212402, "rewards/margins": 0.06703415513038635, "rewards/rejected": -0.3054257929325104, "step": 1437 }, { "epoch": 3.9370294318959616, "grad_norm": 2.0674126148223877, "learning_rate": 8.030136986301369e-07, "log_odds_chosen": 1.238644003868103, "log_odds_ratio": -0.30891135334968567, "logits/chosen": 0.6298385858535767, "logits/rejected": 0.5833731293678284, "logps/chosen": -2.0411760807037354, "logps/rejected": -3.158885955810547, "loss": 0.8021, "nll_loss": 0.7711782455444336, "rewards/accuracies": 1.0, "rewards/chosen": -0.20411761105060577, "rewards/margins": 0.11177098006010056, "rewards/rejected": -0.3158886134624481, "step": 1438 }, { "epoch": 3.9397672826830936, "grad_norm": 2.780926465988159, "learning_rate": 8.02876712328767e-07, "log_odds_chosen": 1.3445024490356445, "log_odds_ratio": -0.25016799569129944, "logits/chosen": 1.053132176399231, "logits/rejected": 1.1218682527542114, "logps/chosen": -2.706791400909424, "logps/rejected": -3.971935272216797, "loss": 0.7164, "nll_loss": 0.6913563013076782, "rewards/accuracies": 1.0, "rewards/chosen": -0.27067917585372925, "rewards/margins": 0.12651436030864716, "rewards/rejected": -0.3971935212612152, "step": 1439 }, { "epoch": 3.9425051334702257, "grad_norm": 2.2717671394348145, "learning_rate": 8.027397260273972e-07, "log_odds_chosen": 1.6142560243606567, "log_odds_ratio": -0.2946288287639618, "logits/chosen": 0.5860264301300049, "logits/rejected": 0.5284799337387085, "logps/chosen": -1.3366377353668213, "logps/rejected": -2.6868226528167725, "loss": 0.8354, "nll_loss": 0.8059614896774292, "rewards/accuracies": 1.0, "rewards/chosen": -0.13366377353668213, "rewards/margins": 0.1350184977054596, "rewards/rejected": -0.2686823010444641, "step": 1440 }, { "epoch": 3.9452429842573578, "grad_norm": 2.147447109222412, "learning_rate": 8.026027397260273e-07, "log_odds_chosen": 1.8760437965393066, "log_odds_ratio": -0.2560359835624695, "logits/chosen": 0.5603770613670349, "logits/rejected": 0.45692649483680725, "logps/chosen": -1.8371764421463013, "logps/rejected": -3.4825212955474854, "loss": 0.9625, "nll_loss": 0.9368842840194702, "rewards/accuracies": 0.875, "rewards/chosen": -0.18371765315532684, "rewards/margins": 0.16453450918197632, "rewards/rejected": -0.3482521176338196, "step": 1441 }, { "epoch": 3.94798083504449, "grad_norm": 2.7614428997039795, "learning_rate": 8.024657534246575e-07, "log_odds_chosen": 1.4371849298477173, "log_odds_ratio": -0.27503710985183716, "logits/chosen": 0.8512588739395142, "logits/rejected": 0.8800190687179565, "logps/chosen": -2.9754393100738525, "logps/rejected": -4.360550403594971, "loss": 0.8777, "nll_loss": 0.8501837253570557, "rewards/accuracies": 1.0, "rewards/chosen": -0.2975439429283142, "rewards/margins": 0.13851110637187958, "rewards/rejected": -0.436055064201355, "step": 1442 }, { "epoch": 3.9507186858316223, "grad_norm": 4.431591033935547, "learning_rate": 8.023287671232876e-07, "log_odds_chosen": 0.0650566816329956, "log_odds_ratio": -0.9014371037483215, "logits/chosen": 0.8554444909095764, "logits/rejected": 0.8087263107299805, "logps/chosen": -2.994986057281494, "logps/rejected": -2.98667049407959, "loss": 0.8554, "nll_loss": 0.7652493119239807, "rewards/accuracies": 0.75, "rewards/chosen": -0.29949861764907837, "rewards/margins": -0.0008315611630678177, "rewards/rejected": -0.2986670434474945, "step": 1443 }, { "epoch": 3.9534565366187544, "grad_norm": 2.1056745052337646, "learning_rate": 8.021917808219177e-07, "log_odds_chosen": 1.2892693281173706, "log_odds_ratio": -0.35922864079475403, "logits/chosen": 0.8026098608970642, "logits/rejected": 0.7674973011016846, "logps/chosen": -2.1238481998443604, "logps/rejected": -3.3085825443267822, "loss": 0.8781, "nll_loss": 0.8421591520309448, "rewards/accuracies": 1.0, "rewards/chosen": -0.21238481998443604, "rewards/margins": 0.11847344040870667, "rewards/rejected": -0.3308582603931427, "step": 1444 }, { "epoch": 3.9561943874058865, "grad_norm": 1.743621587753296, "learning_rate": 8.02054794520548e-07, "log_odds_chosen": 2.653193712234497, "log_odds_ratio": -0.21280497312545776, "logits/chosen": 0.7887355089187622, "logits/rejected": 0.7302263975143433, "logps/chosen": -1.8930613994598389, "logps/rejected": -4.4250688552856445, "loss": 0.7917, "nll_loss": 0.7704145908355713, "rewards/accuracies": 1.0, "rewards/chosen": -0.1893061399459839, "rewards/margins": 0.2532007694244385, "rewards/rejected": -0.44250690937042236, "step": 1445 }, { "epoch": 3.9589322381930185, "grad_norm": 3.2963955402374268, "learning_rate": 8.01917808219178e-07, "log_odds_chosen": 0.2621297240257263, "log_odds_ratio": -1.0549287796020508, "logits/chosen": 0.7696439623832703, "logits/rejected": 0.8135352730751038, "logps/chosen": -2.366218090057373, "logps/rejected": -2.6255946159362793, "loss": 0.8276, "nll_loss": 0.722069501876831, "rewards/accuracies": 0.5, "rewards/chosen": -0.23662179708480835, "rewards/margins": 0.025937668979167938, "rewards/rejected": -0.2625594735145569, "step": 1446 }, { "epoch": 3.9616700889801506, "grad_norm": 3.0834624767303467, "learning_rate": 8.017808219178081e-07, "log_odds_chosen": 1.199125051498413, "log_odds_ratio": -0.3996695280075073, "logits/chosen": 0.8021425604820251, "logits/rejected": 0.7878215909004211, "logps/chosen": -2.5297603607177734, "logps/rejected": -3.610901355743408, "loss": 0.7478, "nll_loss": 0.7077902555465698, "rewards/accuracies": 0.75, "rewards/chosen": -0.25297603011131287, "rewards/margins": 0.10811412334442139, "rewards/rejected": -0.36109012365341187, "step": 1447 }, { "epoch": 3.9644079397672827, "grad_norm": 2.181865692138672, "learning_rate": 8.016438356164384e-07, "log_odds_chosen": 2.1730430126190186, "log_odds_ratio": -0.16181322932243347, "logits/chosen": 0.8643329739570618, "logits/rejected": 0.8469746112823486, "logps/chosen": -2.4593677520751953, "logps/rejected": -4.523070335388184, "loss": 0.8633, "nll_loss": 0.8471546173095703, "rewards/accuracies": 1.0, "rewards/chosen": -0.245936781167984, "rewards/margins": 0.20637021958827972, "rewards/rejected": -0.4523070156574249, "step": 1448 }, { "epoch": 3.9671457905544147, "grad_norm": 2.435685634613037, "learning_rate": 8.015068493150686e-07, "log_odds_chosen": 1.6855666637420654, "log_odds_ratio": -0.25842127203941345, "logits/chosen": 0.7696842551231384, "logits/rejected": 0.748263955116272, "logps/chosen": -2.056715250015259, "logps/rejected": -3.6163666248321533, "loss": 0.8279, "nll_loss": 0.8020660281181335, "rewards/accuracies": 1.0, "rewards/chosen": -0.2056715190410614, "rewards/margins": 0.1559651494026184, "rewards/rejected": -0.3616366386413574, "step": 1449 }, { "epoch": 3.969883641341547, "grad_norm": 2.7239394187927246, "learning_rate": 8.013698630136985e-07, "log_odds_chosen": 0.7407596111297607, "log_odds_ratio": -0.4996596574783325, "logits/chosen": 0.5188037157058716, "logits/rejected": 0.4676290452480316, "logps/chosen": -2.1993422508239746, "logps/rejected": -2.860405445098877, "loss": 0.8106, "nll_loss": 0.7606403827667236, "rewards/accuracies": 0.75, "rewards/chosen": -0.21993421018123627, "rewards/margins": 0.06610631942749023, "rewards/rejected": -0.2860405445098877, "step": 1450 }, { "epoch": 3.972621492128679, "grad_norm": 2.977140188217163, "learning_rate": 8.012328767123288e-07, "log_odds_chosen": 1.2714921236038208, "log_odds_ratio": -0.3331485986709595, "logits/chosen": 0.8966862559318542, "logits/rejected": 0.8765880465507507, "logps/chosen": -2.4279356002807617, "logps/rejected": -3.6251606941223145, "loss": 0.9027, "nll_loss": 0.8693486452102661, "rewards/accuracies": 1.0, "rewards/chosen": -0.24279356002807617, "rewards/margins": 0.11972251534461975, "rewards/rejected": -0.36251604557037354, "step": 1451 }, { "epoch": 3.975359342915811, "grad_norm": 2.870574951171875, "learning_rate": 8.010958904109589e-07, "log_odds_chosen": 2.489928722381592, "log_odds_ratio": -0.3984125256538391, "logits/chosen": 1.073434591293335, "logits/rejected": 1.032069444656372, "logps/chosen": -2.6810879707336426, "logps/rejected": -5.096837520599365, "loss": 0.7933, "nll_loss": 0.7534441351890564, "rewards/accuracies": 0.875, "rewards/chosen": -0.2681087851524353, "rewards/margins": 0.2415749430656433, "rewards/rejected": -0.5096837282180786, "step": 1452 }, { "epoch": 3.9780971937029435, "grad_norm": 3.709260940551758, "learning_rate": 8.00958904109589e-07, "log_odds_chosen": -0.4284459352493286, "log_odds_ratio": -1.241798758506775, "logits/chosen": 1.0719966888427734, "logits/rejected": 1.1174761056900024, "logps/chosen": -3.880582094192505, "logps/rejected": -3.4377217292785645, "loss": 0.9244, "nll_loss": 0.8002400994300842, "rewards/accuracies": 0.625, "rewards/chosen": -0.38805821537971497, "rewards/margins": -0.04428606480360031, "rewards/rejected": -0.34377217292785645, "step": 1453 }, { "epoch": 3.9808350444900755, "grad_norm": 2.2889065742492676, "learning_rate": 8.008219178082192e-07, "log_odds_chosen": 2.0677056312561035, "log_odds_ratio": -0.24535468220710754, "logits/chosen": 0.8264038562774658, "logits/rejected": 0.8316439390182495, "logps/chosen": -2.0197460651397705, "logps/rejected": -3.9469358921051025, "loss": 0.752, "nll_loss": 0.7274685502052307, "rewards/accuracies": 1.0, "rewards/chosen": -0.20197460055351257, "rewards/margins": 0.1927190124988556, "rewards/rejected": -0.3946935832500458, "step": 1454 }, { "epoch": 3.9835728952772076, "grad_norm": 3.021561622619629, "learning_rate": 8.006849315068493e-07, "log_odds_chosen": 1.2877821922302246, "log_odds_ratio": -0.3213984966278076, "logits/chosen": 0.913985550403595, "logits/rejected": 0.9158440828323364, "logps/chosen": -2.3515162467956543, "logps/rejected": -3.541898250579834, "loss": 0.8581, "nll_loss": 0.8259568214416504, "rewards/accuracies": 1.0, "rewards/chosen": -0.23515164852142334, "rewards/margins": 0.11903820186853409, "rewards/rejected": -0.35418984293937683, "step": 1455 }, { "epoch": 3.9863107460643397, "grad_norm": 2.256608247756958, "learning_rate": 8.005479452054795e-07, "log_odds_chosen": 1.9343656301498413, "log_odds_ratio": -0.3708032965660095, "logits/chosen": 1.0108460187911987, "logits/rejected": 1.0193634033203125, "logps/chosen": -2.014163017272949, "logps/rejected": -3.8086585998535156, "loss": 0.8411, "nll_loss": 0.8040578365325928, "rewards/accuracies": 0.75, "rewards/chosen": -0.20141631364822388, "rewards/margins": 0.17944951355457306, "rewards/rejected": -0.38086581230163574, "step": 1456 }, { "epoch": 3.9890485968514717, "grad_norm": 2.381002426147461, "learning_rate": 8.004109589041096e-07, "log_odds_chosen": 2.171149730682373, "log_odds_ratio": -0.4149959087371826, "logits/chosen": 0.6479424834251404, "logits/rejected": 0.6462337970733643, "logps/chosen": -2.299126625061035, "logps/rejected": -4.404521942138672, "loss": 0.8976, "nll_loss": 0.8561201095581055, "rewards/accuracies": 0.75, "rewards/chosen": -0.2299126535654068, "rewards/margins": 0.2105395644903183, "rewards/rejected": -0.4404522180557251, "step": 1457 }, { "epoch": 3.991786447638604, "grad_norm": 2.332444906234741, "learning_rate": 8.002739726027397e-07, "log_odds_chosen": 1.6795518398284912, "log_odds_ratio": -0.37974828481674194, "logits/chosen": 0.7664941549301147, "logits/rejected": 0.7603738903999329, "logps/chosen": -1.7160392999649048, "logps/rejected": -3.3005385398864746, "loss": 0.7704, "nll_loss": 0.7324188351631165, "rewards/accuracies": 0.75, "rewards/chosen": -0.17160393297672272, "rewards/margins": 0.1584499478340149, "rewards/rejected": -0.3300538659095764, "step": 1458 }, { "epoch": 3.994524298425736, "grad_norm": 2.0067837238311768, "learning_rate": 8.001369863013699e-07, "log_odds_chosen": 1.9164663553237915, "log_odds_ratio": -0.28823375701904297, "logits/chosen": 1.039452075958252, "logits/rejected": 1.0242791175842285, "logps/chosen": -1.838722825050354, "logps/rejected": -3.65202260017395, "loss": 0.7515, "nll_loss": 0.7226267457008362, "rewards/accuracies": 0.75, "rewards/chosen": -0.1838722825050354, "rewards/margins": 0.18132996559143066, "rewards/rejected": -0.36520224809646606, "step": 1459 }, { "epoch": 3.997262149212868, "grad_norm": 2.1843554973602295, "learning_rate": 8e-07, "log_odds_chosen": 1.50777006149292, "log_odds_ratio": -0.3118976950645447, "logits/chosen": 0.8299123048782349, "logits/rejected": 0.8154813051223755, "logps/chosen": -1.7052943706512451, "logps/rejected": -3.055427312850952, "loss": 0.8286, "nll_loss": 0.7974191904067993, "rewards/accuracies": 1.0, "rewards/chosen": -0.17052942514419556, "rewards/margins": 0.13501331210136414, "rewards/rejected": -0.3055427670478821, "step": 1460 }, { "epoch": 4.0, "grad_norm": 2.0051803588867188, "learning_rate": 7.998630136986301e-07, "log_odds_chosen": 1.8266897201538086, "log_odds_ratio": -0.251956045627594, "logits/chosen": 0.9891754388809204, "logits/rejected": 0.9411988258361816, "logps/chosen": -1.9461641311645508, "logps/rejected": -3.605860710144043, "loss": 0.8665, "nll_loss": 0.8413208723068237, "rewards/accuracies": 1.0, "rewards/chosen": -0.1946164220571518, "rewards/margins": 0.16596966981887817, "rewards/rejected": -0.36058610677719116, "step": 1461 }, { "epoch": 4.002737850787132, "grad_norm": 3.5634725093841553, "learning_rate": 7.997260273972603e-07, "log_odds_chosen": 1.7803785800933838, "log_odds_ratio": -0.5433878302574158, "logits/chosen": 1.006040096282959, "logits/rejected": 1.0221619606018066, "logps/chosen": -2.692487955093384, "logps/rejected": -4.384790420532227, "loss": 0.7928, "nll_loss": 0.738492488861084, "rewards/accuracies": 0.75, "rewards/chosen": -0.2692488133907318, "rewards/margins": 0.16923019289970398, "rewards/rejected": -0.4384790062904358, "step": 1462 }, { "epoch": 4.005475701574264, "grad_norm": 2.2019824981689453, "learning_rate": 7.995890410958905e-07, "log_odds_chosen": 2.7136611938476562, "log_odds_ratio": -0.1120036393404007, "logits/chosen": 1.1711139678955078, "logits/rejected": 1.1917095184326172, "logps/chosen": -1.8414363861083984, "logps/rejected": -4.322589874267578, "loss": 0.673, "nll_loss": 0.6618043184280396, "rewards/accuracies": 1.0, "rewards/chosen": -0.18414363265037537, "rewards/margins": 0.24811536073684692, "rewards/rejected": -0.4322589635848999, "step": 1463 }, { "epoch": 4.008213552361396, "grad_norm": 2.3506836891174316, "learning_rate": 7.994520547945205e-07, "log_odds_chosen": 1.6419841051101685, "log_odds_ratio": -0.2832420766353607, "logits/chosen": 1.1602494716644287, "logits/rejected": 1.1689924001693726, "logps/chosen": -1.988376498222351, "logps/rejected": -3.489950656890869, "loss": 0.6855, "nll_loss": 0.6571567058563232, "rewards/accuracies": 0.875, "rewards/chosen": -0.19883763790130615, "rewards/margins": 0.15015742182731628, "rewards/rejected": -0.34899505972862244, "step": 1464 }, { "epoch": 4.010951403148528, "grad_norm": 2.415881395339966, "learning_rate": 7.993150684931507e-07, "log_odds_chosen": 1.8863846063613892, "log_odds_ratio": -0.36178892850875854, "logits/chosen": 1.0336360931396484, "logits/rejected": 1.0660768747329712, "logps/chosen": -2.389902114868164, "logps/rejected": -4.165361404418945, "loss": 0.778, "nll_loss": 0.7418042421340942, "rewards/accuracies": 0.875, "rewards/chosen": -0.23899021744728088, "rewards/margins": 0.1775459349155426, "rewards/rejected": -0.4165361523628235, "step": 1465 }, { "epoch": 4.01368925393566, "grad_norm": 2.028925657272339, "learning_rate": 7.991780821917808e-07, "log_odds_chosen": 0.98691725730896, "log_odds_ratio": -0.4709959626197815, "logits/chosen": 0.8215956687927246, "logits/rejected": 0.7942213416099548, "logps/chosen": -2.09228515625, "logps/rejected": -2.9907453060150146, "loss": 0.8391, "nll_loss": 0.7920244336128235, "rewards/accuracies": 0.75, "rewards/chosen": -0.209228515625, "rewards/margins": 0.08984600007534027, "rewards/rejected": -0.29907453060150146, "step": 1466 }, { "epoch": 4.016427104722792, "grad_norm": 2.475032091140747, "learning_rate": 7.990410958904109e-07, "log_odds_chosen": 1.2494826316833496, "log_odds_ratio": -0.42353081703186035, "logits/chosen": 0.8153056502342224, "logits/rejected": 0.778445839881897, "logps/chosen": -1.958038568496704, "logps/rejected": -3.045686721801758, "loss": 0.7944, "nll_loss": 0.7520346641540527, "rewards/accuracies": 0.875, "rewards/chosen": -0.19580385088920593, "rewards/margins": 0.10876479744911194, "rewards/rejected": -0.30456864833831787, "step": 1467 }, { "epoch": 4.0191649555099245, "grad_norm": 2.165071725845337, "learning_rate": 7.989041095890411e-07, "log_odds_chosen": 1.0666509866714478, "log_odds_ratio": -0.3353346586227417, "logits/chosen": 0.7615405917167664, "logits/rejected": 0.6637095808982849, "logps/chosen": -1.6696912050247192, "logps/rejected": -2.5672943592071533, "loss": 0.8754, "nll_loss": 0.8418946266174316, "rewards/accuracies": 1.0, "rewards/chosen": -0.16696912050247192, "rewards/margins": 0.08976031094789505, "rewards/rejected": -0.25672945380210876, "step": 1468 }, { "epoch": 4.0219028062970565, "grad_norm": 3.8725967407226562, "learning_rate": 7.987671232876712e-07, "log_odds_chosen": 0.49721652269363403, "log_odds_ratio": -0.6134469509124756, "logits/chosen": 0.8239164352416992, "logits/rejected": 0.8340326547622681, "logps/chosen": -2.9762346744537354, "logps/rejected": -3.431502342224121, "loss": 0.8071, "nll_loss": 0.7457574605941772, "rewards/accuracies": 0.75, "rewards/chosen": -0.29762348532676697, "rewards/margins": 0.04552676901221275, "rewards/rejected": -0.34315022826194763, "step": 1469 }, { "epoch": 4.024640657084189, "grad_norm": 2.8690459728240967, "learning_rate": 7.986301369863014e-07, "log_odds_chosen": 2.182898759841919, "log_odds_ratio": -0.3998297452926636, "logits/chosen": 0.7537090182304382, "logits/rejected": 0.6794362664222717, "logps/chosen": -1.849700689315796, "logps/rejected": -3.7651214599609375, "loss": 0.8246, "nll_loss": 0.7845925092697144, "rewards/accuracies": 0.875, "rewards/chosen": -0.18497008085250854, "rewards/margins": 0.19154205918312073, "rewards/rejected": -0.3765121102333069, "step": 1470 }, { "epoch": 4.027378507871321, "grad_norm": 2.6076653003692627, "learning_rate": 7.984931506849315e-07, "log_odds_chosen": 0.9697445631027222, "log_odds_ratio": -0.44843655824661255, "logits/chosen": 0.7375872731208801, "logits/rejected": 0.7037838697433472, "logps/chosen": -1.9814598560333252, "logps/rejected": -2.8875467777252197, "loss": 0.8625, "nll_loss": 0.8176611661911011, "rewards/accuracies": 0.75, "rewards/chosen": -0.19814598560333252, "rewards/margins": 0.09060868620872498, "rewards/rejected": -0.2887546718120575, "step": 1471 }, { "epoch": 4.030116358658453, "grad_norm": 2.6541755199432373, "learning_rate": 7.983561643835616e-07, "log_odds_chosen": 1.7868585586547852, "log_odds_ratio": -0.2554778456687927, "logits/chosen": 0.6435821056365967, "logits/rejected": 0.5235811471939087, "logps/chosen": -1.9173346757888794, "logps/rejected": -3.439093589782715, "loss": 0.7624, "nll_loss": 0.7368672490119934, "rewards/accuracies": 0.875, "rewards/chosen": -0.1917334794998169, "rewards/margins": 0.1521759033203125, "rewards/rejected": -0.3439094126224518, "step": 1472 }, { "epoch": 4.032854209445585, "grad_norm": 2.2151644229888916, "learning_rate": 7.982191780821918e-07, "log_odds_chosen": 1.4773119688034058, "log_odds_ratio": -0.30725422501564026, "logits/chosen": 0.7688084840774536, "logits/rejected": 0.759752631187439, "logps/chosen": -2.0310988426208496, "logps/rejected": -3.4177935123443604, "loss": 0.7733, "nll_loss": 0.7426061630249023, "rewards/accuracies": 0.875, "rewards/chosen": -0.20310989022254944, "rewards/margins": 0.1386694610118866, "rewards/rejected": -0.34177935123443604, "step": 1473 }, { "epoch": 4.035592060232717, "grad_norm": 1.9122838973999023, "learning_rate": 7.980821917808219e-07, "log_odds_chosen": 1.5347676277160645, "log_odds_ratio": -0.33925458788871765, "logits/chosen": 0.8719556331634521, "logits/rejected": 0.840766191482544, "logps/chosen": -1.822160005569458, "logps/rejected": -3.2094802856445312, "loss": 0.7739, "nll_loss": 0.7399392127990723, "rewards/accuracies": 0.875, "rewards/chosen": -0.18221600353717804, "rewards/margins": 0.13873204588890076, "rewards/rejected": -0.32094806432724, "step": 1474 }, { "epoch": 4.03832991101985, "grad_norm": 2.2699408531188965, "learning_rate": 7.97945205479452e-07, "log_odds_chosen": 0.9024395942687988, "log_odds_ratio": -0.37450486421585083, "logits/chosen": 0.6867156624794006, "logits/rejected": 0.6413406729698181, "logps/chosen": -1.6377661228179932, "logps/rejected": -2.397411346435547, "loss": 0.8217, "nll_loss": 0.7842176556587219, "rewards/accuracies": 1.0, "rewards/chosen": -0.16377662122249603, "rewards/margins": 0.07596452534198761, "rewards/rejected": -0.23974114656448364, "step": 1475 }, { "epoch": 4.041067761806982, "grad_norm": 2.06545352935791, "learning_rate": 7.978082191780822e-07, "log_odds_chosen": 0.9794946908950806, "log_odds_ratio": -0.4162137806415558, "logits/chosen": 0.6575497388839722, "logits/rejected": 0.6423018574714661, "logps/chosen": -2.023394823074341, "logps/rejected": -2.9402964115142822, "loss": 0.8148, "nll_loss": 0.7731631398200989, "rewards/accuracies": 0.875, "rewards/chosen": -0.20233947038650513, "rewards/margins": 0.09169016778469086, "rewards/rejected": -0.2940296530723572, "step": 1476 }, { "epoch": 4.043805612594114, "grad_norm": 3.449528217315674, "learning_rate": 7.976712328767124e-07, "log_odds_chosen": 1.05851411819458, "log_odds_ratio": -0.5841258764266968, "logits/chosen": 0.9028782844543457, "logits/rejected": 0.9136875867843628, "logps/chosen": -2.501887798309326, "logps/rejected": -3.4655520915985107, "loss": 0.8276, "nll_loss": 0.7691963315010071, "rewards/accuracies": 0.75, "rewards/chosen": -0.25018879771232605, "rewards/margins": 0.09636643528938293, "rewards/rejected": -0.346555233001709, "step": 1477 }, { "epoch": 4.046543463381246, "grad_norm": 3.5788919925689697, "learning_rate": 7.975342465753424e-07, "log_odds_chosen": 1.779914379119873, "log_odds_ratio": -0.5162510275840759, "logits/chosen": 0.6020139455795288, "logits/rejected": 0.6267389059066772, "logps/chosen": -2.7722034454345703, "logps/rejected": -4.494307041168213, "loss": 0.9879, "nll_loss": 0.9362570643424988, "rewards/accuracies": 0.75, "rewards/chosen": -0.27722036838531494, "rewards/margins": 0.17221033573150635, "rewards/rejected": -0.4494307041168213, "step": 1478 }, { "epoch": 4.049281314168378, "grad_norm": 1.952715277671814, "learning_rate": 7.973972602739726e-07, "log_odds_chosen": 2.7335779666900635, "log_odds_ratio": -0.20200219750404358, "logits/chosen": 0.9085662961006165, "logits/rejected": 0.8347104787826538, "logps/chosen": -1.7839603424072266, "logps/rejected": -4.3463664054870605, "loss": 0.7663, "nll_loss": 0.7460981011390686, "rewards/accuracies": 1.0, "rewards/chosen": -0.1783960461616516, "rewards/margins": 0.256240576505661, "rewards/rejected": -0.4346366226673126, "step": 1479 }, { "epoch": 4.05201916495551, "grad_norm": 3.748015880584717, "learning_rate": 7.972602739726027e-07, "log_odds_chosen": 1.5378230810165405, "log_odds_ratio": -0.32432541251182556, "logits/chosen": 0.8869113326072693, "logits/rejected": 0.9063689708709717, "logps/chosen": -2.1155104637145996, "logps/rejected": -3.4658260345458984, "loss": 0.7948, "nll_loss": 0.7623318433761597, "rewards/accuracies": 0.875, "rewards/chosen": -0.21155104041099548, "rewards/margins": 0.1350315660238266, "rewards/rejected": -0.3465825915336609, "step": 1480 }, { "epoch": 4.054757015742642, "grad_norm": 2.3794941902160645, "learning_rate": 7.971232876712328e-07, "log_odds_chosen": 1.2506434917449951, "log_odds_ratio": -0.37855055928230286, "logits/chosen": 0.5776135921478271, "logits/rejected": 0.5224405527114868, "logps/chosen": -2.0059800148010254, "logps/rejected": -3.1679916381835938, "loss": 0.8902, "nll_loss": 0.8523762226104736, "rewards/accuracies": 0.875, "rewards/chosen": -0.20059800148010254, "rewards/margins": 0.11620116978883743, "rewards/rejected": -0.3167991638183594, "step": 1481 }, { "epoch": 4.057494866529774, "grad_norm": 4.390059471130371, "learning_rate": 7.96986301369863e-07, "log_odds_chosen": 1.428105115890503, "log_odds_ratio": -0.6037407517433167, "logits/chosen": 1.0894386768341064, "logits/rejected": 1.1522594690322876, "logps/chosen": -2.6979289054870605, "logps/rejected": -3.9914984703063965, "loss": 0.8855, "nll_loss": 0.82513827085495, "rewards/accuracies": 0.75, "rewards/chosen": -0.2697928845882416, "rewards/margins": 0.1293569952249527, "rewards/rejected": -0.3991498649120331, "step": 1482 }, { "epoch": 4.060232717316906, "grad_norm": 2.29549503326416, "learning_rate": 7.968493150684931e-07, "log_odds_chosen": 1.192030668258667, "log_odds_ratio": -0.3384789526462555, "logits/chosen": 0.6213323473930359, "logits/rejected": 0.5806416273117065, "logps/chosen": -1.981471300125122, "logps/rejected": -3.0828843116760254, "loss": 0.8089, "nll_loss": 0.7750810980796814, "rewards/accuracies": 1.0, "rewards/chosen": -0.19814711809158325, "rewards/margins": 0.11014130711555481, "rewards/rejected": -0.30828845500946045, "step": 1483 }, { "epoch": 4.0629705681040384, "grad_norm": 3.7133986949920654, "learning_rate": 7.967123287671232e-07, "log_odds_chosen": 0.8190214037895203, "log_odds_ratio": -0.38577765226364136, "logits/chosen": 0.9935140609741211, "logits/rejected": 0.9416008591651917, "logps/chosen": -1.9454658031463623, "logps/rejected": -2.650703191757202, "loss": 0.7799, "nll_loss": 0.7413548827171326, "rewards/accuracies": 1.0, "rewards/chosen": -0.19454658031463623, "rewards/margins": 0.07052373141050339, "rewards/rejected": -0.2650703191757202, "step": 1484 }, { "epoch": 4.0657084188911705, "grad_norm": 2.6171376705169678, "learning_rate": 7.965753424657534e-07, "log_odds_chosen": 1.5970149040222168, "log_odds_ratio": -0.2747209370136261, "logits/chosen": 1.0835877656936646, "logits/rejected": 1.06203293800354, "logps/chosen": -2.1848623752593994, "logps/rejected": -3.6733202934265137, "loss": 0.7417, "nll_loss": 0.7142601013183594, "rewards/accuracies": 0.875, "rewards/chosen": -0.2184862494468689, "rewards/margins": 0.14884577691555023, "rewards/rejected": -0.3673320412635803, "step": 1485 }, { "epoch": 4.068446269678303, "grad_norm": 2.2280259132385254, "learning_rate": 7.964383561643835e-07, "log_odds_chosen": 1.5647897720336914, "log_odds_ratio": -0.24385207891464233, "logits/chosen": 0.5157951712608337, "logits/rejected": 0.4263172745704651, "logps/chosen": -2.069077730178833, "logps/rejected": -3.5349364280700684, "loss": 0.8602, "nll_loss": 0.8358640670776367, "rewards/accuracies": 0.875, "rewards/chosen": -0.20690777897834778, "rewards/margins": 0.1465858519077301, "rewards/rejected": -0.35349366068840027, "step": 1486 }, { "epoch": 4.071184120465435, "grad_norm": 2.3257737159729004, "learning_rate": 7.963013698630137e-07, "log_odds_chosen": 2.339670419692993, "log_odds_ratio": -0.2745524048805237, "logits/chosen": 0.7378711104393005, "logits/rejected": 0.7564041018486023, "logps/chosen": -2.2359771728515625, "logps/rejected": -4.450338363647461, "loss": 0.8253, "nll_loss": 0.7978261709213257, "rewards/accuracies": 0.875, "rewards/chosen": -0.22359773516654968, "rewards/margins": 0.22143609821796417, "rewards/rejected": -0.44503381848335266, "step": 1487 }, { "epoch": 4.073921971252567, "grad_norm": 3.085272789001465, "learning_rate": 7.961643835616438e-07, "log_odds_chosen": 1.6798638105392456, "log_odds_ratio": -0.3114200234413147, "logits/chosen": 0.6014266610145569, "logits/rejected": 0.5977983474731445, "logps/chosen": -2.7682876586914062, "logps/rejected": -4.357955455780029, "loss": 0.8247, "nll_loss": 0.7935383319854736, "rewards/accuracies": 0.875, "rewards/chosen": -0.2768287658691406, "rewards/margins": 0.1589667797088623, "rewards/rejected": -0.43579554557800293, "step": 1488 }, { "epoch": 4.076659822039699, "grad_norm": 2.7085165977478027, "learning_rate": 7.960273972602739e-07, "log_odds_chosen": 1.7482755184173584, "log_odds_ratio": -0.4351893365383148, "logits/chosen": 0.8400802612304688, "logits/rejected": 0.8491172194480896, "logps/chosen": -1.890047311782837, "logps/rejected": -3.3137378692626953, "loss": 0.8477, "nll_loss": 0.8041502833366394, "rewards/accuracies": 0.875, "rewards/chosen": -0.18900473415851593, "rewards/margins": 0.1423690766096115, "rewards/rejected": -0.33137378096580505, "step": 1489 }, { "epoch": 4.079397672826831, "grad_norm": 2.317138910293579, "learning_rate": 7.958904109589041e-07, "log_odds_chosen": 1.139780879020691, "log_odds_ratio": -0.35847124457359314, "logits/chosen": 0.7134681940078735, "logits/rejected": 0.6796401739120483, "logps/chosen": -1.7378425598144531, "logps/rejected": -2.7866134643554688, "loss": 0.7622, "nll_loss": 0.7263760566711426, "rewards/accuracies": 1.0, "rewards/chosen": -0.1737842559814453, "rewards/margins": 0.10487709194421768, "rewards/rejected": -0.2786613404750824, "step": 1490 }, { "epoch": 4.082135523613963, "grad_norm": 3.860177993774414, "learning_rate": 7.957534246575343e-07, "log_odds_chosen": 1.4757583141326904, "log_odds_ratio": -0.38811159133911133, "logits/chosen": 0.9877445697784424, "logits/rejected": 0.9598071575164795, "logps/chosen": -2.270442247390747, "logps/rejected": -3.6419191360473633, "loss": 0.7355, "nll_loss": 0.696683943271637, "rewards/accuracies": 0.875, "rewards/chosen": -0.2270442247390747, "rewards/margins": 0.1371476799249649, "rewards/rejected": -0.3641918897628784, "step": 1491 }, { "epoch": 4.084873374401095, "grad_norm": 3.1407861709594727, "learning_rate": 7.956164383561643e-07, "log_odds_chosen": 1.0512620210647583, "log_odds_ratio": -0.5729607343673706, "logits/chosen": 0.9659186005592346, "logits/rejected": 0.9301905035972595, "logps/chosen": -2.875016689300537, "logps/rejected": -3.9033992290496826, "loss": 0.8813, "nll_loss": 0.8240252137184143, "rewards/accuracies": 0.5, "rewards/chosen": -0.28750166296958923, "rewards/margins": 0.10283827781677246, "rewards/rejected": -0.3903399407863617, "step": 1492 }, { "epoch": 4.087611225188227, "grad_norm": 2.1030073165893555, "learning_rate": 7.954794520547945e-07, "log_odds_chosen": 1.9137706756591797, "log_odds_ratio": -0.2562384605407715, "logits/chosen": 1.0176141262054443, "logits/rejected": 0.9853564500808716, "logps/chosen": -2.0454039573669434, "logps/rejected": -3.8256187438964844, "loss": 0.7784, "nll_loss": 0.7527639865875244, "rewards/accuracies": 1.0, "rewards/chosen": -0.2045404314994812, "rewards/margins": 0.17802144587039948, "rewards/rejected": -0.3825618624687195, "step": 1493 }, { "epoch": 4.090349075975359, "grad_norm": 2.924623489379883, "learning_rate": 7.953424657534247e-07, "log_odds_chosen": 1.7230284214019775, "log_odds_ratio": -0.3728410601615906, "logits/chosen": 0.4685328006744385, "logits/rejected": 0.38745903968811035, "logps/chosen": -2.0600099563598633, "logps/rejected": -3.6148788928985596, "loss": 0.8725, "nll_loss": 0.8352100253105164, "rewards/accuracies": 0.875, "rewards/chosen": -0.20600098371505737, "rewards/margins": 0.15548692643642426, "rewards/rejected": -0.36148789525032043, "step": 1494 }, { "epoch": 4.093086926762491, "grad_norm": 2.378854990005493, "learning_rate": 7.952054794520547e-07, "log_odds_chosen": 0.8298170566558838, "log_odds_ratio": -0.46325311064720154, "logits/chosen": 0.599149763584137, "logits/rejected": 0.5457509160041809, "logps/chosen": -2.000438690185547, "logps/rejected": -2.729733943939209, "loss": 0.8476, "nll_loss": 0.8013044595718384, "rewards/accuracies": 0.875, "rewards/chosen": -0.20004388689994812, "rewards/margins": 0.0729295164346695, "rewards/rejected": -0.2729733884334564, "step": 1495 }, { "epoch": 4.095824777549623, "grad_norm": 2.3026649951934814, "learning_rate": 7.950684931506849e-07, "log_odds_chosen": 2.5574488639831543, "log_odds_ratio": -0.18858875334262848, "logits/chosen": 0.8425688147544861, "logits/rejected": 0.8188588619232178, "logps/chosen": -2.034369707107544, "logps/rejected": -4.380476951599121, "loss": 0.7933, "nll_loss": 0.7744104266166687, "rewards/accuracies": 1.0, "rewards/chosen": -0.2034369707107544, "rewards/margins": 0.23461076617240906, "rewards/rejected": -0.43804770708084106, "step": 1496 }, { "epoch": 4.098562628336755, "grad_norm": 2.0845584869384766, "learning_rate": 7.94931506849315e-07, "log_odds_chosen": 1.6120549440383911, "log_odds_ratio": -0.3222416937351227, "logits/chosen": 0.8103764653205872, "logits/rejected": 0.8190179467201233, "logps/chosen": -1.8588886260986328, "logps/rejected": -3.328524589538574, "loss": 0.7434, "nll_loss": 0.7112258672714233, "rewards/accuracies": 0.875, "rewards/chosen": -0.18588887155056, "rewards/margins": 0.14696361124515533, "rewards/rejected": -0.33285248279571533, "step": 1497 }, { "epoch": 4.101300479123887, "grad_norm": 2.495954751968384, "learning_rate": 7.947945205479451e-07, "log_odds_chosen": 1.6050409078598022, "log_odds_ratio": -0.23539550602436066, "logits/chosen": 0.961597204208374, "logits/rejected": 0.9344723224639893, "logps/chosen": -1.954389214515686, "logps/rejected": -3.3767025470733643, "loss": 0.8164, "nll_loss": 0.7928866147994995, "rewards/accuracies": 1.0, "rewards/chosen": -0.1954389214515686, "rewards/margins": 0.1422313153743744, "rewards/rejected": -0.3376702666282654, "step": 1498 }, { "epoch": 4.1040383299110195, "grad_norm": 3.3622803688049316, "learning_rate": 7.946575342465753e-07, "log_odds_chosen": 0.8814275860786438, "log_odds_ratio": -0.5356226563453674, "logits/chosen": 0.8360925912857056, "logits/rejected": 0.8394980430603027, "logps/chosen": -2.223491907119751, "logps/rejected": -3.0033326148986816, "loss": 0.8276, "nll_loss": 0.7740757465362549, "rewards/accuracies": 0.875, "rewards/chosen": -0.22234921157360077, "rewards/margins": 0.07798407226800919, "rewards/rejected": -0.30033329129219055, "step": 1499 }, { "epoch": 4.1067761806981515, "grad_norm": 2.379375696182251, "learning_rate": 7.945205479452054e-07, "log_odds_chosen": 1.0547726154327393, "log_odds_ratio": -0.4605368971824646, "logits/chosen": 0.7054867744445801, "logits/rejected": 0.6652421951293945, "logps/chosen": -1.823603868484497, "logps/rejected": -2.785719394683838, "loss": 0.8788, "nll_loss": 0.8327606916427612, "rewards/accuracies": 0.625, "rewards/chosen": -0.18236038088798523, "rewards/margins": 0.09621156752109528, "rewards/rejected": -0.2785719633102417, "step": 1500 }, { "epoch": 4.1095140314852845, "grad_norm": 1.9188885688781738, "learning_rate": 7.943835616438356e-07, "log_odds_chosen": 1.484544038772583, "log_odds_ratio": -0.29034847021102905, "logits/chosen": 0.5743894577026367, "logits/rejected": 0.5039415955543518, "logps/chosen": -1.884436011314392, "logps/rejected": -3.2511250972747803, "loss": 0.9139, "nll_loss": 0.8848637342453003, "rewards/accuracies": 1.0, "rewards/chosen": -0.1884436160326004, "rewards/margins": 0.13666892051696777, "rewards/rejected": -0.32511255145072937, "step": 1501 }, { "epoch": 4.112251882272417, "grad_norm": 2.881481409072876, "learning_rate": 7.942465753424657e-07, "log_odds_chosen": 1.8647668361663818, "log_odds_ratio": -0.22837454080581665, "logits/chosen": 0.8047535419464111, "logits/rejected": 0.8046050071716309, "logps/chosen": -2.1575374603271484, "logps/rejected": -3.9016916751861572, "loss": 0.8505, "nll_loss": 0.8276852965354919, "rewards/accuracies": 1.0, "rewards/chosen": -0.2157537341117859, "rewards/margins": 0.1744154542684555, "rewards/rejected": -0.3901691734790802, "step": 1502 }, { "epoch": 4.114989733059549, "grad_norm": 3.0223593711853027, "learning_rate": 7.941095890410958e-07, "log_odds_chosen": 1.7897684574127197, "log_odds_ratio": -0.3903433382511139, "logits/chosen": 0.8304359912872314, "logits/rejected": 0.7606275677680969, "logps/chosen": -2.1029536724090576, "logps/rejected": -3.7940726280212402, "loss": 0.7907, "nll_loss": 0.7516605257987976, "rewards/accuracies": 0.875, "rewards/chosen": -0.21029537916183472, "rewards/margins": 0.16911189258098602, "rewards/rejected": -0.37940728664398193, "step": 1503 }, { "epoch": 4.117727583846681, "grad_norm": 2.1751708984375, "learning_rate": 7.93972602739726e-07, "log_odds_chosen": 1.6497268676757812, "log_odds_ratio": -0.2514837384223938, "logits/chosen": 0.3824165463447571, "logits/rejected": 0.2778605818748474, "logps/chosen": -1.689655065536499, "logps/rejected": -3.16351318359375, "loss": 0.8627, "nll_loss": 0.8375213146209717, "rewards/accuracies": 1.0, "rewards/chosen": -0.16896551847457886, "rewards/margins": 0.14738580584526062, "rewards/rejected": -0.3163512945175171, "step": 1504 }, { "epoch": 4.120465434633813, "grad_norm": 2.9986143112182617, "learning_rate": 7.938356164383561e-07, "log_odds_chosen": 1.1696991920471191, "log_odds_ratio": -0.380134642124176, "logits/chosen": 0.8978039026260376, "logits/rejected": 0.8838605880737305, "logps/chosen": -2.953763961791992, "logps/rejected": -4.053293228149414, "loss": 0.9669, "nll_loss": 0.9288762807846069, "rewards/accuracies": 0.875, "rewards/chosen": -0.29537639021873474, "rewards/margins": 0.10995295643806458, "rewards/rejected": -0.4053293764591217, "step": 1505 }, { "epoch": 4.123203285420945, "grad_norm": 2.5099337100982666, "learning_rate": 7.936986301369862e-07, "log_odds_chosen": 1.4619464874267578, "log_odds_ratio": -0.397350549697876, "logits/chosen": 0.8325111865997314, "logits/rejected": 0.8106116056442261, "logps/chosen": -2.777625322341919, "logps/rejected": -4.149011611938477, "loss": 0.7993, "nll_loss": 0.7596107721328735, "rewards/accuracies": 0.875, "rewards/chosen": -0.2777625322341919, "rewards/margins": 0.13713869452476501, "rewards/rejected": -0.4149012267589569, "step": 1506 }, { "epoch": 4.125941136208077, "grad_norm": 2.6391499042510986, "learning_rate": 7.935616438356164e-07, "log_odds_chosen": 1.8789482116699219, "log_odds_ratio": -0.33163899183273315, "logits/chosen": 0.9559903740882874, "logits/rejected": 0.9901821613311768, "logps/chosen": -2.4985299110412598, "logps/rejected": -4.271190166473389, "loss": 0.7709, "nll_loss": 0.7377756237983704, "rewards/accuracies": 0.875, "rewards/chosen": -0.2498529851436615, "rewards/margins": 0.17726601660251617, "rewards/rejected": -0.42711901664733887, "step": 1507 }, { "epoch": 4.128678986995209, "grad_norm": 3.402449369430542, "learning_rate": 7.934246575342466e-07, "log_odds_chosen": 1.0660933256149292, "log_odds_ratio": -0.45678311586380005, "logits/chosen": 0.6302931308746338, "logits/rejected": 0.631001353263855, "logps/chosen": -2.4820942878723145, "logps/rejected": -3.4792990684509277, "loss": 0.8613, "nll_loss": 0.8156455755233765, "rewards/accuracies": 0.875, "rewards/chosen": -0.24820944666862488, "rewards/margins": 0.09972050040960312, "rewards/rejected": -0.3479299247264862, "step": 1508 }, { "epoch": 4.131416837782341, "grad_norm": 2.367357015609741, "learning_rate": 7.932876712328766e-07, "log_odds_chosen": 1.554811716079712, "log_odds_ratio": -0.3139258325099945, "logits/chosen": 0.8577045202255249, "logits/rejected": 0.8250314593315125, "logps/chosen": -2.1096343994140625, "logps/rejected": -3.5519609451293945, "loss": 0.8115, "nll_loss": 0.7801393866539001, "rewards/accuracies": 1.0, "rewards/chosen": -0.2109634429216385, "rewards/margins": 0.14423266053199768, "rewards/rejected": -0.355196088552475, "step": 1509 }, { "epoch": 4.134154688569473, "grad_norm": 2.9478063583374023, "learning_rate": 7.931506849315068e-07, "log_odds_chosen": 0.02049356698989868, "log_odds_ratio": -0.9223414659500122, "logits/chosen": 0.8240565061569214, "logits/rejected": 0.7737915515899658, "logps/chosen": -2.647122383117676, "logps/rejected": -2.6122703552246094, "loss": 0.8815, "nll_loss": 0.7892788052558899, "rewards/accuracies": 0.625, "rewards/chosen": -0.26471224427223206, "rewards/margins": -0.0034852158278226852, "rewards/rejected": -0.261227011680603, "step": 1510 }, { "epoch": 4.136892539356605, "grad_norm": 2.1043970584869385, "learning_rate": 7.930136986301369e-07, "log_odds_chosen": 1.4523396492004395, "log_odds_ratio": -0.4157969057559967, "logits/chosen": 0.83272385597229, "logits/rejected": 0.8226600885391235, "logps/chosen": -1.8187196254730225, "logps/rejected": -3.1399178504943848, "loss": 0.814, "nll_loss": 0.7723931074142456, "rewards/accuracies": 0.875, "rewards/chosen": -0.18187196552753448, "rewards/margins": 0.132119819521904, "rewards/rejected": -0.3139917850494385, "step": 1511 }, { "epoch": 4.139630390143737, "grad_norm": 2.522892713546753, "learning_rate": 7.92876712328767e-07, "log_odds_chosen": 0.9125666618347168, "log_odds_ratio": -0.38789495825767517, "logits/chosen": 0.6894996166229248, "logits/rejected": 0.6526615023612976, "logps/chosen": -2.115497350692749, "logps/rejected": -2.940197706222534, "loss": 0.8231, "nll_loss": 0.7843473553657532, "rewards/accuracies": 1.0, "rewards/chosen": -0.21154972910881042, "rewards/margins": 0.08247002959251404, "rewards/rejected": -0.29401978850364685, "step": 1512 }, { "epoch": 4.142368240930869, "grad_norm": 2.7046172618865967, "learning_rate": 7.927397260273972e-07, "log_odds_chosen": 1.3959680795669556, "log_odds_ratio": -0.4824327230453491, "logits/chosen": 0.8086972236633301, "logits/rejected": 0.7862504720687866, "logps/chosen": -3.046311140060425, "logps/rejected": -4.414156436920166, "loss": 0.9895, "nll_loss": 0.9412714838981628, "rewards/accuracies": 0.625, "rewards/chosen": -0.3046311140060425, "rewards/margins": 0.13678455352783203, "rewards/rejected": -0.4414156973361969, "step": 1513 }, { "epoch": 4.145106091718001, "grad_norm": 3.2884247303009033, "learning_rate": 7.926027397260273e-07, "log_odds_chosen": 1.5617287158966064, "log_odds_ratio": -0.4245319664478302, "logits/chosen": 1.073714256286621, "logits/rejected": 1.0979174375534058, "logps/chosen": -2.762795925140381, "logps/rejected": -4.269346237182617, "loss": 0.8067, "nll_loss": 0.7641968131065369, "rewards/accuracies": 0.625, "rewards/chosen": -0.27627962827682495, "rewards/margins": 0.15065503120422363, "rewards/rejected": -0.4269346296787262, "step": 1514 }, { "epoch": 4.147843942505133, "grad_norm": 4.715481281280518, "learning_rate": 7.924657534246575e-07, "log_odds_chosen": 0.7982827425003052, "log_odds_ratio": -0.8123215436935425, "logits/chosen": 0.634102463722229, "logits/rejected": 0.5644322037696838, "logps/chosen": -3.250838279724121, "logps/rejected": -3.9232020378112793, "loss": 0.8601, "nll_loss": 0.7788596153259277, "rewards/accuracies": 0.75, "rewards/chosen": -0.32508385181427, "rewards/margins": 0.06723637133836746, "rewards/rejected": -0.3923202455043793, "step": 1515 }, { "epoch": 4.1505817932922655, "grad_norm": 3.2503044605255127, "learning_rate": 7.923287671232876e-07, "log_odds_chosen": 0.4780806303024292, "log_odds_ratio": -0.5083436369895935, "logits/chosen": 0.7590008974075317, "logits/rejected": 0.6666803956031799, "logps/chosen": -1.8564438819885254, "logps/rejected": -2.253753185272217, "loss": 0.8793, "nll_loss": 0.8284513354301453, "rewards/accuracies": 0.75, "rewards/chosen": -0.18564440310001373, "rewards/margins": 0.039730921387672424, "rewards/rejected": -0.22537530958652496, "step": 1516 }, { "epoch": 4.153319644079398, "grad_norm": 5.700675010681152, "learning_rate": 7.921917808219177e-07, "log_odds_chosen": 0.37902700901031494, "log_odds_ratio": -0.8139445185661316, "logits/chosen": 0.7890477180480957, "logits/rejected": 0.7806075811386108, "logps/chosen": -2.553642511367798, "logps/rejected": -2.9104738235473633, "loss": 0.868, "nll_loss": 0.7866445779800415, "rewards/accuracies": 0.625, "rewards/chosen": -0.2553642690181732, "rewards/margins": 0.03568312153220177, "rewards/rejected": -0.2910473942756653, "step": 1517 }, { "epoch": 4.15605749486653, "grad_norm": 3.1607584953308105, "learning_rate": 7.920547945205479e-07, "log_odds_chosen": 1.005104660987854, "log_odds_ratio": -0.4218384027481079, "logits/chosen": 0.93177330493927, "logits/rejected": 0.9829505681991577, "logps/chosen": -2.629061222076416, "logps/rejected": -3.6171059608459473, "loss": 0.7982, "nll_loss": 0.7560279965400696, "rewards/accuracies": 0.75, "rewards/chosen": -0.26290610432624817, "rewards/margins": 0.09880447387695312, "rewards/rejected": -0.3617106080055237, "step": 1518 }, { "epoch": 4.158795345653662, "grad_norm": 3.4806861877441406, "learning_rate": 7.91917808219178e-07, "log_odds_chosen": 0.8047586679458618, "log_odds_ratio": -0.5778363347053528, "logits/chosen": 0.8623551726341248, "logits/rejected": 0.8404382467269897, "logps/chosen": -1.8024063110351562, "logps/rejected": -2.519528388977051, "loss": 0.7878, "nll_loss": 0.7299864888191223, "rewards/accuracies": 0.875, "rewards/chosen": -0.18024063110351562, "rewards/margins": 0.07171221077442169, "rewards/rejected": -0.2519528567790985, "step": 1519 }, { "epoch": 4.161533196440794, "grad_norm": 2.3158915042877197, "learning_rate": 7.917808219178081e-07, "log_odds_chosen": 2.2214877605438232, "log_odds_ratio": -0.15922144055366516, "logits/chosen": 1.1597788333892822, "logits/rejected": 1.1853232383728027, "logps/chosen": -1.7437965869903564, "logps/rejected": -3.7826812267303467, "loss": 0.7407, "nll_loss": 0.7247947454452515, "rewards/accuracies": 1.0, "rewards/chosen": -0.17437967658042908, "rewards/margins": 0.20388846099376678, "rewards/rejected": -0.37826815247535706, "step": 1520 }, { "epoch": 4.164271047227926, "grad_norm": 3.6930181980133057, "learning_rate": 7.916438356164383e-07, "log_odds_chosen": 0.7507137656211853, "log_odds_ratio": -0.509931743144989, "logits/chosen": 0.7761152386665344, "logits/rejected": 0.7963041067123413, "logps/chosen": -2.287714719772339, "logps/rejected": -2.9732272624969482, "loss": 0.7996, "nll_loss": 0.7485954761505127, "rewards/accuracies": 0.75, "rewards/chosen": -0.22877147793769836, "rewards/margins": 0.06855125725269318, "rewards/rejected": -0.29732275009155273, "step": 1521 }, { "epoch": 4.167008898015058, "grad_norm": 2.6161253452301025, "learning_rate": 7.915068493150685e-07, "log_odds_chosen": 0.8474071025848389, "log_odds_ratio": -0.42875462770462036, "logits/chosen": 0.7699398994445801, "logits/rejected": 0.7967450022697449, "logps/chosen": -1.8973100185394287, "logps/rejected": -2.645350933074951, "loss": 0.7306, "nll_loss": 0.6877274513244629, "rewards/accuracies": 0.75, "rewards/chosen": -0.18973100185394287, "rewards/margins": 0.07480411976575851, "rewards/rejected": -0.264535129070282, "step": 1522 }, { "epoch": 4.16974674880219, "grad_norm": 2.823917865753174, "learning_rate": 7.913698630136985e-07, "log_odds_chosen": 0.8899355530738831, "log_odds_ratio": -0.4223979413509369, "logits/chosen": 0.8775684833526611, "logits/rejected": 0.8739883899688721, "logps/chosen": -2.006037473678589, "logps/rejected": -2.828603982925415, "loss": 0.8256, "nll_loss": 0.7833245992660522, "rewards/accuracies": 0.875, "rewards/chosen": -0.20060375332832336, "rewards/margins": 0.08225664496421814, "rewards/rejected": -0.2828603982925415, "step": 1523 }, { "epoch": 4.172484599589322, "grad_norm": 2.1205694675445557, "learning_rate": 7.912328767123287e-07, "log_odds_chosen": 1.7299599647521973, "log_odds_ratio": -0.2229175567626953, "logits/chosen": 0.740503191947937, "logits/rejected": 0.7420488595962524, "logps/chosen": -1.9757453203201294, "logps/rejected": -3.572317361831665, "loss": 0.7569, "nll_loss": 0.7345819473266602, "rewards/accuracies": 1.0, "rewards/chosen": -0.19757454097270966, "rewards/margins": 0.15965721011161804, "rewards/rejected": -0.3572317659854889, "step": 1524 }, { "epoch": 4.175222450376454, "grad_norm": 2.338184356689453, "learning_rate": 7.91095890410959e-07, "log_odds_chosen": 1.7101624011993408, "log_odds_ratio": -0.3298743963241577, "logits/chosen": 0.944015383720398, "logits/rejected": 0.8991657495498657, "logps/chosen": -2.205732583999634, "logps/rejected": -3.8341288566589355, "loss": 0.8456, "nll_loss": 0.8125841617584229, "rewards/accuracies": 0.75, "rewards/chosen": -0.22057326138019562, "rewards/margins": 0.16283965110778809, "rewards/rejected": -0.3834128975868225, "step": 1525 }, { "epoch": 4.177960301163586, "grad_norm": 2.4089949131011963, "learning_rate": 7.909589041095889e-07, "log_odds_chosen": 1.55234956741333, "log_odds_ratio": -0.2706557512283325, "logits/chosen": 0.6756107807159424, "logits/rejected": 0.6346938014030457, "logps/chosen": -1.5595645904541016, "logps/rejected": -2.897653579711914, "loss": 0.7487, "nll_loss": 0.7216222882270813, "rewards/accuracies": 1.0, "rewards/chosen": -0.1559564620256424, "rewards/margins": 0.133808895945549, "rewards/rejected": -0.2897653579711914, "step": 1526 }, { "epoch": 4.180698151950718, "grad_norm": 3.1348326206207275, "learning_rate": 7.908219178082191e-07, "log_odds_chosen": 0.7646065950393677, "log_odds_ratio": -0.5235384702682495, "logits/chosen": 0.6035683155059814, "logits/rejected": 0.5911623239517212, "logps/chosen": -2.4315788745880127, "logps/rejected": -3.133204936981201, "loss": 0.8071, "nll_loss": 0.7547693252563477, "rewards/accuracies": 0.75, "rewards/chosen": -0.24315787851810455, "rewards/margins": 0.07016260176897049, "rewards/rejected": -0.31332048773765564, "step": 1527 }, { "epoch": 4.183436002737851, "grad_norm": 3.0345287322998047, "learning_rate": 7.906849315068492e-07, "log_odds_chosen": 2.029387950897217, "log_odds_ratio": -0.3914983868598938, "logits/chosen": 0.9428896307945251, "logits/rejected": 0.8924345970153809, "logps/chosen": -2.61198353767395, "logps/rejected": -4.501829624176025, "loss": 0.7564, "nll_loss": 0.7172353267669678, "rewards/accuracies": 0.875, "rewards/chosen": -0.26119834184646606, "rewards/margins": 0.18898457288742065, "rewards/rejected": -0.4501829445362091, "step": 1528 }, { "epoch": 4.186173853524983, "grad_norm": 2.549116373062134, "learning_rate": 7.905479452054795e-07, "log_odds_chosen": 1.1049323081970215, "log_odds_ratio": -0.4095882475376129, "logits/chosen": 0.8341346979141235, "logits/rejected": 0.7555016279220581, "logps/chosen": -1.8513195514678955, "logps/rejected": -2.8161466121673584, "loss": 0.9432, "nll_loss": 0.9022316336631775, "rewards/accuracies": 0.875, "rewards/chosen": -0.1851319670677185, "rewards/margins": 0.09648269414901733, "rewards/rejected": -0.28161466121673584, "step": 1529 }, { "epoch": 4.188911704312115, "grad_norm": 2.077970027923584, "learning_rate": 7.904109589041096e-07, "log_odds_chosen": 1.829871654510498, "log_odds_ratio": -0.29998499155044556, "logits/chosen": 0.910114049911499, "logits/rejected": 0.8136834502220154, "logps/chosen": -2.153902292251587, "logps/rejected": -3.867452383041382, "loss": 0.7935, "nll_loss": 0.7634877562522888, "rewards/accuracies": 1.0, "rewards/chosen": -0.21539025008678436, "rewards/margins": 0.1713549941778183, "rewards/rejected": -0.38674527406692505, "step": 1530 }, { "epoch": 4.191649555099247, "grad_norm": 3.6836891174316406, "learning_rate": 7.902739726027396e-07, "log_odds_chosen": 0.15286913514137268, "log_odds_ratio": -0.900810718536377, "logits/chosen": 0.7713223099708557, "logits/rejected": 0.803268313407898, "logps/chosen": -2.57827091217041, "logps/rejected": -2.7125043869018555, "loss": 0.8698, "nll_loss": 0.7797638177871704, "rewards/accuracies": 0.625, "rewards/chosen": -0.25782710313796997, "rewards/margins": 0.013423370197415352, "rewards/rejected": -0.27125048637390137, "step": 1531 }, { "epoch": 4.1943874058863795, "grad_norm": 2.404726028442383, "learning_rate": 7.901369863013699e-07, "log_odds_chosen": 0.8428884744644165, "log_odds_ratio": -0.444875031709671, "logits/chosen": 0.641656756401062, "logits/rejected": 0.574004590511322, "logps/chosen": -1.96724534034729, "logps/rejected": -2.719571590423584, "loss": 0.8177, "nll_loss": 0.7732414603233337, "rewards/accuracies": 0.875, "rewards/chosen": -0.196724534034729, "rewards/margins": 0.0752326250076294, "rewards/rejected": -0.2719571590423584, "step": 1532 }, { "epoch": 4.1971252566735116, "grad_norm": 3.4283173084259033, "learning_rate": 7.9e-07, "log_odds_chosen": 1.8713483810424805, "log_odds_ratio": -0.32831186056137085, "logits/chosen": 0.6929155588150024, "logits/rejected": 0.592466413974762, "logps/chosen": -2.103501796722412, "logps/rejected": -3.8695030212402344, "loss": 0.8414, "nll_loss": 0.8085391521453857, "rewards/accuracies": 0.875, "rewards/chosen": -0.2103501856327057, "rewards/margins": 0.1766001284122467, "rewards/rejected": -0.3869503140449524, "step": 1533 }, { "epoch": 4.199863107460644, "grad_norm": 4.118930816650391, "learning_rate": 7.8986301369863e-07, "log_odds_chosen": -0.24767106771469116, "log_odds_ratio": -1.000267744064331, "logits/chosen": 0.719660222530365, "logits/rejected": 0.7108942270278931, "logps/chosen": -2.968255043029785, "logps/rejected": -2.6818127632141113, "loss": 0.9019, "nll_loss": 0.8018543720245361, "rewards/accuracies": 0.75, "rewards/chosen": -0.29682549834251404, "rewards/margins": -0.028644206002354622, "rewards/rejected": -0.26818132400512695, "step": 1534 }, { "epoch": 4.202600958247776, "grad_norm": 2.201918363571167, "learning_rate": 7.897260273972603e-07, "log_odds_chosen": 2.331449508666992, "log_odds_ratio": -0.21329939365386963, "logits/chosen": 0.5980441570281982, "logits/rejected": 0.6191981434822083, "logps/chosen": -1.6153502464294434, "logps/rejected": -3.7819302082061768, "loss": 0.8554, "nll_loss": 0.8340432047843933, "rewards/accuracies": 0.875, "rewards/chosen": -0.16153502464294434, "rewards/margins": 0.21665801107883453, "rewards/rejected": -0.3781930208206177, "step": 1535 }, { "epoch": 4.205338809034908, "grad_norm": 3.2019569873809814, "learning_rate": 7.895890410958905e-07, "log_odds_chosen": 2.6462016105651855, "log_odds_ratio": -0.4432331919670105, "logits/chosen": 0.9692527055740356, "logits/rejected": 0.9965558648109436, "logps/chosen": -2.302340507507324, "logps/rejected": -4.865743160247803, "loss": 0.8559, "nll_loss": 0.8115988969802856, "rewards/accuracies": 0.75, "rewards/chosen": -0.2302340567111969, "rewards/margins": 0.25634026527404785, "rewards/rejected": -0.48657429218292236, "step": 1536 }, { "epoch": 4.20807665982204, "grad_norm": 3.3831307888031006, "learning_rate": 7.894520547945205e-07, "log_odds_chosen": 1.1767247915267944, "log_odds_ratio": -0.7446801066398621, "logits/chosen": 0.7842904329299927, "logits/rejected": 0.7717090249061584, "logps/chosen": -3.156094551086426, "logps/rejected": -4.338468074798584, "loss": 0.8491, "nll_loss": 0.7746679782867432, "rewards/accuracies": 0.75, "rewards/chosen": -0.3156094551086426, "rewards/margins": 0.11823739856481552, "rewards/rejected": -0.4338468313217163, "step": 1537 }, { "epoch": 4.210814510609172, "grad_norm": 1.9997409582138062, "learning_rate": 7.893150684931507e-07, "log_odds_chosen": 2.185401678085327, "log_odds_ratio": -0.2124462127685547, "logits/chosen": 0.7596871256828308, "logits/rejected": 0.7157699465751648, "logps/chosen": -1.5559356212615967, "logps/rejected": -3.5533533096313477, "loss": 0.7335, "nll_loss": 0.7122291326522827, "rewards/accuracies": 1.0, "rewards/chosen": -0.15559357404708862, "rewards/margins": 0.19974176585674286, "rewards/rejected": -0.3553353548049927, "step": 1538 }, { "epoch": 4.213552361396304, "grad_norm": 4.548766136169434, "learning_rate": 7.891780821917809e-07, "log_odds_chosen": 0.22439825534820557, "log_odds_ratio": -0.8149523735046387, "logits/chosen": 0.7805457711219788, "logits/rejected": 0.7430967092514038, "logps/chosen": -1.7874596118927002, "logps/rejected": -1.8949759006500244, "loss": 0.8277, "nll_loss": 0.7462271451950073, "rewards/accuracies": 0.875, "rewards/chosen": -0.17874595522880554, "rewards/margins": 0.010751649737358093, "rewards/rejected": -0.18949760496616364, "step": 1539 }, { "epoch": 4.216290212183436, "grad_norm": 2.0242695808410645, "learning_rate": 7.890410958904109e-07, "log_odds_chosen": 3.0308170318603516, "log_odds_ratio": -0.26509982347488403, "logits/chosen": 0.7484292984008789, "logits/rejected": 0.7295246124267578, "logps/chosen": -2.0886125564575195, "logps/rejected": -5.025418281555176, "loss": 0.8251, "nll_loss": 0.7986069917678833, "rewards/accuracies": 0.75, "rewards/chosen": -0.20886126160621643, "rewards/margins": 0.2936805486679077, "rewards/rejected": -0.5025418400764465, "step": 1540 }, { "epoch": 4.219028062970568, "grad_norm": 3.9575862884521484, "learning_rate": 7.889041095890411e-07, "log_odds_chosen": 1.0294811725616455, "log_odds_ratio": -0.36953288316726685, "logits/chosen": 0.7765827775001526, "logits/rejected": 0.712160587310791, "logps/chosen": -1.9163060188293457, "logps/rejected": -2.8608877658843994, "loss": 0.8429, "nll_loss": 0.8059187531471252, "rewards/accuracies": 0.875, "rewards/chosen": -0.19163060188293457, "rewards/margins": 0.09445817023515701, "rewards/rejected": -0.286088764667511, "step": 1541 }, { "epoch": 4.2217659137577, "grad_norm": 2.1497738361358643, "learning_rate": 7.887671232876712e-07, "log_odds_chosen": 1.5390448570251465, "log_odds_ratio": -0.3243323564529419, "logits/chosen": 0.840545654296875, "logits/rejected": 0.8039968013763428, "logps/chosen": -2.049379825592041, "logps/rejected": -3.495589256286621, "loss": 0.8046, "nll_loss": 0.7721693515777588, "rewards/accuracies": 0.875, "rewards/chosen": -0.20493799448013306, "rewards/margins": 0.14462094008922577, "rewards/rejected": -0.34955894947052, "step": 1542 }, { "epoch": 4.224503764544832, "grad_norm": 2.405975818634033, "learning_rate": 7.886301369863014e-07, "log_odds_chosen": 1.5352225303649902, "log_odds_ratio": -0.3980244994163513, "logits/chosen": 0.960407018661499, "logits/rejected": 1.0180059671401978, "logps/chosen": -1.8853274583816528, "logps/rejected": -3.2658817768096924, "loss": 0.7531, "nll_loss": 0.713291585445404, "rewards/accuracies": 0.75, "rewards/chosen": -0.188532754778862, "rewards/margins": 0.13805542886257172, "rewards/rejected": -0.32658815383911133, "step": 1543 }, { "epoch": 4.227241615331964, "grad_norm": 2.616257905960083, "learning_rate": 7.884931506849315e-07, "log_odds_chosen": 1.4741621017456055, "log_odds_ratio": -0.3856458365917206, "logits/chosen": 0.8289133906364441, "logits/rejected": 0.878119707107544, "logps/chosen": -2.072939872741699, "logps/rejected": -3.385098457336426, "loss": 0.7317, "nll_loss": 0.6931045055389404, "rewards/accuracies": 0.875, "rewards/chosen": -0.20729398727416992, "rewards/margins": 0.1312158703804016, "rewards/rejected": -0.33850985765457153, "step": 1544 }, { "epoch": 4.229979466119096, "grad_norm": 3.865355968475342, "learning_rate": 7.883561643835616e-07, "log_odds_chosen": 1.6248034238815308, "log_odds_ratio": -0.5144687294960022, "logits/chosen": 0.9807171821594238, "logits/rejected": 0.9969165325164795, "logps/chosen": -2.7340118885040283, "logps/rejected": -4.187197685241699, "loss": 0.8154, "nll_loss": 0.7639418244361877, "rewards/accuracies": 0.75, "rewards/chosen": -0.2734012007713318, "rewards/margins": 0.14531858265399933, "rewards/rejected": -0.4187197685241699, "step": 1545 }, { "epoch": 4.232717316906228, "grad_norm": 2.2184245586395264, "learning_rate": 7.882191780821918e-07, "log_odds_chosen": 1.1047918796539307, "log_odds_ratio": -0.32740694284439087, "logits/chosen": 0.6427160501480103, "logits/rejected": 0.6547499895095825, "logps/chosen": -1.8562027215957642, "logps/rejected": -2.847172260284424, "loss": 0.719, "nll_loss": 0.6862924098968506, "rewards/accuracies": 0.875, "rewards/chosen": -0.1856202781200409, "rewards/margins": 0.09909695386886597, "rewards/rejected": -0.28471723198890686, "step": 1546 }, { "epoch": 4.2354551676933605, "grad_norm": 4.542616844177246, "learning_rate": 7.880821917808219e-07, "log_odds_chosen": 0.9869286417961121, "log_odds_ratio": -0.5387017726898193, "logits/chosen": 0.7627196311950684, "logits/rejected": 0.7512261867523193, "logps/chosen": -3.1452744007110596, "logps/rejected": -3.9896020889282227, "loss": 0.9199, "nll_loss": 0.8660413026809692, "rewards/accuracies": 0.75, "rewards/chosen": -0.3145274519920349, "rewards/margins": 0.08443275839090347, "rewards/rejected": -0.3989602327346802, "step": 1547 }, { "epoch": 4.238193018480493, "grad_norm": 4.090450763702393, "learning_rate": 7.87945205479452e-07, "log_odds_chosen": 1.7858781814575195, "log_odds_ratio": -0.2664962112903595, "logits/chosen": 0.7537735104560852, "logits/rejected": 0.6169293522834778, "logps/chosen": -2.2943339347839355, "logps/rejected": -3.9676947593688965, "loss": 0.8334, "nll_loss": 0.8067216873168945, "rewards/accuracies": 1.0, "rewards/chosen": -0.22943341732025146, "rewards/margins": 0.16733606159687042, "rewards/rejected": -0.3967694938182831, "step": 1548 }, { "epoch": 4.240930869267625, "grad_norm": 2.1445977687835693, "learning_rate": 7.878082191780822e-07, "log_odds_chosen": 1.6122286319732666, "log_odds_ratio": -0.44657227396965027, "logits/chosen": 0.6354455351829529, "logits/rejected": 0.5747888088226318, "logps/chosen": -1.7543818950653076, "logps/rejected": -3.2899317741394043, "loss": 0.8667, "nll_loss": 0.8220126628875732, "rewards/accuracies": 0.625, "rewards/chosen": -0.17543819546699524, "rewards/margins": 0.15355496108531952, "rewards/rejected": -0.32899314165115356, "step": 1549 }, { "epoch": 4.243668720054757, "grad_norm": 2.1296730041503906, "learning_rate": 7.876712328767124e-07, "log_odds_chosen": 2.640871286392212, "log_odds_ratio": -0.13985475897789001, "logits/chosen": 0.8739178776741028, "logits/rejected": 0.8787574768066406, "logps/chosen": -1.9627012014389038, "logps/rejected": -4.450357913970947, "loss": 0.7774, "nll_loss": 0.7633690237998962, "rewards/accuracies": 1.0, "rewards/chosen": -0.1962701380252838, "rewards/margins": 0.24876564741134644, "rewards/rejected": -0.44503575563430786, "step": 1550 }, { "epoch": 4.246406570841889, "grad_norm": 2.926454544067383, "learning_rate": 7.875342465753424e-07, "log_odds_chosen": 1.5239537954330444, "log_odds_ratio": -0.3778025805950165, "logits/chosen": 0.5856420993804932, "logits/rejected": 0.5662418603897095, "logps/chosen": -2.008363723754883, "logps/rejected": -3.4024667739868164, "loss": 0.7865, "nll_loss": 0.7487487196922302, "rewards/accuracies": 0.875, "rewards/chosen": -0.20083636045455933, "rewards/margins": 0.13941031694412231, "rewards/rejected": -0.34024667739868164, "step": 1551 }, { "epoch": 4.249144421629021, "grad_norm": 2.0338923931121826, "learning_rate": 7.873972602739726e-07, "log_odds_chosen": 1.456263780593872, "log_odds_ratio": -0.29708051681518555, "logits/chosen": 0.9009864926338196, "logits/rejected": 0.8812585473060608, "logps/chosen": -1.901015281677246, "logps/rejected": -3.223289966583252, "loss": 0.7514, "nll_loss": 0.7216846346855164, "rewards/accuracies": 1.0, "rewards/chosen": -0.1901015341281891, "rewards/margins": 0.13222746551036835, "rewards/rejected": -0.32232898473739624, "step": 1552 }, { "epoch": 4.251882272416153, "grad_norm": 4.08899450302124, "learning_rate": 7.872602739726028e-07, "log_odds_chosen": 0.9827497005462646, "log_odds_ratio": -0.581581711769104, "logits/chosen": 0.8272396922111511, "logits/rejected": 0.7933998703956604, "logps/chosen": -2.4489331245422363, "logps/rejected": -3.293991804122925, "loss": 0.8449, "nll_loss": 0.7867016792297363, "rewards/accuracies": 0.875, "rewards/chosen": -0.24489331245422363, "rewards/margins": 0.08450589329004288, "rewards/rejected": -0.3293991982936859, "step": 1553 }, { "epoch": 4.254620123203285, "grad_norm": 2.392883539199829, "learning_rate": 7.871232876712328e-07, "log_odds_chosen": 1.508588433265686, "log_odds_ratio": -0.48491746187210083, "logits/chosen": 0.6889557838439941, "logits/rejected": 0.6888813972473145, "logps/chosen": -2.1593589782714844, "logps/rejected": -3.5864644050598145, "loss": 0.8105, "nll_loss": 0.7620054483413696, "rewards/accuracies": 0.875, "rewards/chosen": -0.21593591570854187, "rewards/margins": 0.14271050691604614, "rewards/rejected": -0.3586464524269104, "step": 1554 }, { "epoch": 4.257357973990418, "grad_norm": 3.3329997062683105, "learning_rate": 7.86986301369863e-07, "log_odds_chosen": 1.3082504272460938, "log_odds_ratio": -0.7567225098609924, "logits/chosen": 0.8264745473861694, "logits/rejected": 0.8788110017776489, "logps/chosen": -2.396481990814209, "logps/rejected": -3.564478635787964, "loss": 0.833, "nll_loss": 0.7573132514953613, "rewards/accuracies": 0.625, "rewards/chosen": -0.2396482229232788, "rewards/margins": 0.11679966747760773, "rewards/rejected": -0.35644787549972534, "step": 1555 }, { "epoch": 4.26009582477755, "grad_norm": 3.102959156036377, "learning_rate": 7.868493150684932e-07, "log_odds_chosen": 1.011926531791687, "log_odds_ratio": -0.42957955598831177, "logits/chosen": 0.7197214961051941, "logits/rejected": 0.6245328187942505, "logps/chosen": -1.9902056455612183, "logps/rejected": -2.9176387786865234, "loss": 0.8741, "nll_loss": 0.8311467170715332, "rewards/accuracies": 0.75, "rewards/chosen": -0.19902056455612183, "rewards/margins": 0.09274332225322723, "rewards/rejected": -0.29176387190818787, "step": 1556 }, { "epoch": 4.262833675564682, "grad_norm": 3.2545950412750244, "learning_rate": 7.867123287671233e-07, "log_odds_chosen": 0.5716254711151123, "log_odds_ratio": -0.5311245918273926, "logits/chosen": 0.5184633135795593, "logits/rejected": 0.4441002607345581, "logps/chosen": -2.1763529777526855, "logps/rejected": -2.6392362117767334, "loss": 0.7961, "nll_loss": 0.7430030107498169, "rewards/accuracies": 0.75, "rewards/chosen": -0.21763530373573303, "rewards/margins": 0.04628835618495941, "rewards/rejected": -0.26392364501953125, "step": 1557 }, { "epoch": 4.265571526351814, "grad_norm": 2.222024917602539, "learning_rate": 7.865753424657534e-07, "log_odds_chosen": 2.1443018913269043, "log_odds_ratio": -0.2523316740989685, "logits/chosen": 0.6867661476135254, "logits/rejected": 0.5770875811576843, "logps/chosen": -1.3994817733764648, "logps/rejected": -3.355095386505127, "loss": 0.8607, "nll_loss": 0.8354600667953491, "rewards/accuracies": 1.0, "rewards/chosen": -0.13994817435741425, "rewards/margins": 0.19556137919425964, "rewards/rejected": -0.3355095386505127, "step": 1558 }, { "epoch": 4.268309377138946, "grad_norm": 2.6979928016662598, "learning_rate": 7.864383561643835e-07, "log_odds_chosen": 3.1566619873046875, "log_odds_ratio": -0.11523707956075668, "logits/chosen": 0.9669366478919983, "logits/rejected": 0.9481686353683472, "logps/chosen": -2.5281834602355957, "logps/rejected": -5.566220283508301, "loss": 0.8202, "nll_loss": 0.8087257146835327, "rewards/accuracies": 1.0, "rewards/chosen": -0.25281834602355957, "rewards/margins": 0.3038036823272705, "rewards/rejected": -0.5566220283508301, "step": 1559 }, { "epoch": 4.271047227926078, "grad_norm": 2.4871394634246826, "learning_rate": 7.863013698630137e-07, "log_odds_chosen": 1.1907713413238525, "log_odds_ratio": -0.42215225100517273, "logits/chosen": 0.8561850786209106, "logits/rejected": 0.7887632846832275, "logps/chosen": -2.065441131591797, "logps/rejected": -3.1010828018188477, "loss": 0.8672, "nll_loss": 0.8250048160552979, "rewards/accuracies": 0.875, "rewards/chosen": -0.20654411613941193, "rewards/margins": 0.10356418788433075, "rewards/rejected": -0.3101083040237427, "step": 1560 }, { "epoch": 4.27378507871321, "grad_norm": 2.4846839904785156, "learning_rate": 7.861643835616438e-07, "log_odds_chosen": 1.5177125930786133, "log_odds_ratio": -0.5213903784751892, "logits/chosen": 0.6523463726043701, "logits/rejected": 0.6192033290863037, "logps/chosen": -1.8145666122436523, "logps/rejected": -3.2757058143615723, "loss": 0.7946, "nll_loss": 0.7424827814102173, "rewards/accuracies": 0.625, "rewards/chosen": -0.18145667016506195, "rewards/margins": 0.14611391723155975, "rewards/rejected": -0.3275705873966217, "step": 1561 }, { "epoch": 4.276522929500342, "grad_norm": 2.9590253829956055, "learning_rate": 7.860273972602739e-07, "log_odds_chosen": 1.115342378616333, "log_odds_ratio": -0.4812779426574707, "logits/chosen": 0.5202484130859375, "logits/rejected": 0.521577000617981, "logps/chosen": -2.1891560554504395, "logps/rejected": -3.2270843982696533, "loss": 0.9052, "nll_loss": 0.8570639491081238, "rewards/accuracies": 0.875, "rewards/chosen": -0.21891559660434723, "rewards/margins": 0.10379284620285034, "rewards/rejected": -0.3227084279060364, "step": 1562 }, { "epoch": 4.2792607802874745, "grad_norm": 3.1051907539367676, "learning_rate": 7.858904109589041e-07, "log_odds_chosen": 0.927697479724884, "log_odds_ratio": -0.5714734196662903, "logits/chosen": 0.8870329856872559, "logits/rejected": 0.9397257566452026, "logps/chosen": -2.4612531661987305, "logps/rejected": -3.2967052459716797, "loss": 0.7514, "nll_loss": 0.6942125558853149, "rewards/accuracies": 0.75, "rewards/chosen": -0.24612532556056976, "rewards/margins": 0.08354523032903671, "rewards/rejected": -0.3296705484390259, "step": 1563 }, { "epoch": 4.2819986310746065, "grad_norm": 2.4176528453826904, "learning_rate": 7.857534246575343e-07, "log_odds_chosen": 1.085000991821289, "log_odds_ratio": -0.36843276023864746, "logits/chosen": 0.9330586194992065, "logits/rejected": 0.9262542724609375, "logps/chosen": -1.9183584451675415, "logps/rejected": -2.8631980419158936, "loss": 0.7599, "nll_loss": 0.7230910658836365, "rewards/accuracies": 0.875, "rewards/chosen": -0.19183585047721863, "rewards/margins": 0.09448395669460297, "rewards/rejected": -0.2863197922706604, "step": 1564 }, { "epoch": 4.284736481861739, "grad_norm": 2.1625630855560303, "learning_rate": 7.856164383561643e-07, "log_odds_chosen": 1.6198986768722534, "log_odds_ratio": -0.39499497413635254, "logits/chosen": 0.8772061467170715, "logits/rejected": 0.8370180130004883, "logps/chosen": -2.0925211906433105, "logps/rejected": -3.6417288780212402, "loss": 0.8446, "nll_loss": 0.8051009774208069, "rewards/accuracies": 0.75, "rewards/chosen": -0.20925213396549225, "rewards/margins": 0.154920756816864, "rewards/rejected": -0.36417287588119507, "step": 1565 }, { "epoch": 4.287474332648871, "grad_norm": 2.6102046966552734, "learning_rate": 7.854794520547945e-07, "log_odds_chosen": 1.624261498451233, "log_odds_ratio": -0.3662484288215637, "logits/chosen": 0.65754234790802, "logits/rejected": 0.6491640210151672, "logps/chosen": -2.070256233215332, "logps/rejected": -3.531937837600708, "loss": 0.8941, "nll_loss": 0.8574468493461609, "rewards/accuracies": 0.875, "rewards/chosen": -0.20702563226222992, "rewards/margins": 0.14616815745830536, "rewards/rejected": -0.3531937897205353, "step": 1566 }, { "epoch": 4.290212183436003, "grad_norm": 2.4044981002807617, "learning_rate": 7.853424657534247e-07, "log_odds_chosen": 1.304184913635254, "log_odds_ratio": -0.3099765479564667, "logits/chosen": 0.9723253846168518, "logits/rejected": 0.9839016795158386, "logps/chosen": -2.009690284729004, "logps/rejected": -3.187654495239258, "loss": 0.7443, "nll_loss": 0.7133042216300964, "rewards/accuracies": 1.0, "rewards/chosen": -0.20096902549266815, "rewards/margins": 0.11779646575450897, "rewards/rejected": -0.31876546144485474, "step": 1567 }, { "epoch": 4.292950034223135, "grad_norm": 2.512235164642334, "learning_rate": 7.852054794520547e-07, "log_odds_chosen": 1.3797979354858398, "log_odds_ratio": -0.280640572309494, "logits/chosen": 0.7417433261871338, "logits/rejected": 0.7011415362358093, "logps/chosen": -1.7059924602508545, "logps/rejected": -2.8941121101379395, "loss": 0.82, "nll_loss": 0.7919248342514038, "rewards/accuracies": 1.0, "rewards/chosen": -0.17059925198554993, "rewards/margins": 0.11881197243928909, "rewards/rejected": -0.2894112169742584, "step": 1568 }, { "epoch": 4.295687885010267, "grad_norm": 2.3496322631835938, "learning_rate": 7.850684931506849e-07, "log_odds_chosen": 1.635506510734558, "log_odds_ratio": -0.3284958600997925, "logits/chosen": 0.7984398603439331, "logits/rejected": 0.7820972204208374, "logps/chosen": -1.679158329963684, "logps/rejected": -3.181027889251709, "loss": 0.7992, "nll_loss": 0.766343355178833, "rewards/accuracies": 0.875, "rewards/chosen": -0.16791585087776184, "rewards/margins": 0.1501869261264801, "rewards/rejected": -0.31810277700424194, "step": 1569 }, { "epoch": 4.298425735797399, "grad_norm": 2.458439826965332, "learning_rate": 7.849315068493151e-07, "log_odds_chosen": 0.7694989442825317, "log_odds_ratio": -0.5383228659629822, "logits/chosen": 0.557065486907959, "logits/rejected": 0.517241358757019, "logps/chosen": -1.9764100313186646, "logps/rejected": -2.7187740802764893, "loss": 0.8561, "nll_loss": 0.8023058176040649, "rewards/accuracies": 0.625, "rewards/chosen": -0.1976410150527954, "rewards/margins": 0.07423640787601471, "rewards/rejected": -0.2718774080276489, "step": 1570 }, { "epoch": 4.301163586584531, "grad_norm": 2.462351083755493, "learning_rate": 7.847945205479451e-07, "log_odds_chosen": 1.2833478450775146, "log_odds_ratio": -0.3882143795490265, "logits/chosen": 0.7483795881271362, "logits/rejected": 0.7340032458305359, "logps/chosen": -2.4905123710632324, "logps/rejected": -3.681583881378174, "loss": 0.8197, "nll_loss": 0.7809101343154907, "rewards/accuracies": 0.75, "rewards/chosen": -0.2490512579679489, "rewards/margins": 0.11910714209079742, "rewards/rejected": -0.3681584298610687, "step": 1571 }, { "epoch": 4.303901437371663, "grad_norm": 3.9888060092926025, "learning_rate": 7.846575342465753e-07, "log_odds_chosen": 0.9377163052558899, "log_odds_ratio": -0.7384741306304932, "logits/chosen": 0.6769992709159851, "logits/rejected": 0.6296377778053284, "logps/chosen": -2.979897975921631, "logps/rejected": -3.851525068283081, "loss": 0.9109, "nll_loss": 0.8370521664619446, "rewards/accuracies": 0.75, "rewards/chosen": -0.29798978567123413, "rewards/margins": 0.08716274797916412, "rewards/rejected": -0.38515251874923706, "step": 1572 }, { "epoch": 4.306639288158795, "grad_norm": 2.3037288188934326, "learning_rate": 7.845205479452054e-07, "log_odds_chosen": 1.0239205360412598, "log_odds_ratio": -0.4028649628162384, "logits/chosen": 0.8056902289390564, "logits/rejected": 0.7670125365257263, "logps/chosen": -1.5266790390014648, "logps/rejected": -2.4128386974334717, "loss": 0.8477, "nll_loss": 0.8073838353157043, "rewards/accuracies": 0.875, "rewards/chosen": -0.15266790986061096, "rewards/margins": 0.08861594647169113, "rewards/rejected": -0.2412838488817215, "step": 1573 }, { "epoch": 4.309377138945927, "grad_norm": 3.320512533187866, "learning_rate": 7.843835616438356e-07, "log_odds_chosen": 1.2968854904174805, "log_odds_ratio": -0.4077887535095215, "logits/chosen": 0.7275926470756531, "logits/rejected": 0.6452561616897583, "logps/chosen": -2.2618675231933594, "logps/rejected": -3.46112060546875, "loss": 0.7986, "nll_loss": 0.757858157157898, "rewards/accuracies": 0.75, "rewards/chosen": -0.22618675231933594, "rewards/margins": 0.11992530524730682, "rewards/rejected": -0.34611204266548157, "step": 1574 }, { "epoch": 4.312114989733059, "grad_norm": 2.223102331161499, "learning_rate": 7.842465753424657e-07, "log_odds_chosen": 2.318507671356201, "log_odds_ratio": -0.20767942070960999, "logits/chosen": 0.6499142050743103, "logits/rejected": 0.6421157121658325, "logps/chosen": -1.684039831161499, "logps/rejected": -3.827798366546631, "loss": 0.8289, "nll_loss": 0.8081227540969849, "rewards/accuracies": 1.0, "rewards/chosen": -0.1684039980173111, "rewards/margins": 0.21437586843967438, "rewards/rejected": -0.3827798664569855, "step": 1575 }, { "epoch": 4.314852840520191, "grad_norm": 2.19295597076416, "learning_rate": 7.841095890410958e-07, "log_odds_chosen": 2.0948173999786377, "log_odds_ratio": -0.22559814155101776, "logits/chosen": 0.883212149143219, "logits/rejected": 0.8682397603988647, "logps/chosen": -1.7445424795150757, "logps/rejected": -3.668217658996582, "loss": 0.7042, "nll_loss": 0.6816753149032593, "rewards/accuracies": 1.0, "rewards/chosen": -0.17445425689220428, "rewards/margins": 0.19236749410629272, "rewards/rejected": -0.3668217658996582, "step": 1576 }, { "epoch": 4.317590691307323, "grad_norm": 2.19883394241333, "learning_rate": 7.83972602739726e-07, "log_odds_chosen": 1.3693543672561646, "log_odds_ratio": -0.4247821569442749, "logits/chosen": 0.5633552670478821, "logits/rejected": 0.5336883664131165, "logps/chosen": -1.6548471450805664, "logps/rejected": -2.8867616653442383, "loss": 0.8137, "nll_loss": 0.7712589502334595, "rewards/accuracies": 0.875, "rewards/chosen": -0.1654846966266632, "rewards/margins": 0.1231914758682251, "rewards/rejected": -0.2886761724948883, "step": 1577 }, { "epoch": 4.3203285420944555, "grad_norm": 5.673452377319336, "learning_rate": 7.838356164383562e-07, "log_odds_chosen": 0.5519786477088928, "log_odds_ratio": -0.6120710968971252, "logits/chosen": 0.8909101486206055, "logits/rejected": 0.8736351132392883, "logps/chosen": -2.660952091217041, "logps/rejected": -3.1109299659729004, "loss": 0.8254, "nll_loss": 0.7642387747764587, "rewards/accuracies": 0.875, "rewards/chosen": -0.26609522104263306, "rewards/margins": 0.04499780759215355, "rewards/rejected": -0.3110930323600769, "step": 1578 }, { "epoch": 4.323066392881588, "grad_norm": 2.806645154953003, "learning_rate": 7.836986301369862e-07, "log_odds_chosen": 1.4957079887390137, "log_odds_ratio": -0.46821585297584534, "logits/chosen": 0.8099384307861328, "logits/rejected": 0.8205816745758057, "logps/chosen": -2.124957799911499, "logps/rejected": -3.514566421508789, "loss": 0.865, "nll_loss": 0.8181553483009338, "rewards/accuracies": 0.75, "rewards/chosen": -0.21249578893184662, "rewards/margins": 0.1389608532190323, "rewards/rejected": -0.3514566421508789, "step": 1579 }, { "epoch": 4.3258042436687205, "grad_norm": 2.9201998710632324, "learning_rate": 7.835616438356164e-07, "log_odds_chosen": 0.5675068497657776, "log_odds_ratio": -0.567003071308136, "logits/chosen": 0.9551794528961182, "logits/rejected": 0.9468681812286377, "logps/chosen": -2.4427971839904785, "logps/rejected": -2.977482795715332, "loss": 0.7644, "nll_loss": 0.7076911926269531, "rewards/accuracies": 0.75, "rewards/chosen": -0.24427972733974457, "rewards/margins": 0.05346855893731117, "rewards/rejected": -0.29774829745292664, "step": 1580 }, { "epoch": 4.328542094455852, "grad_norm": 3.4954769611358643, "learning_rate": 7.834246575342466e-07, "log_odds_chosen": 1.8392319679260254, "log_odds_ratio": -0.28079918026924133, "logits/chosen": 0.6682740449905396, "logits/rejected": 0.6309924721717834, "logps/chosen": -2.4484829902648926, "logps/rejected": -4.182369709014893, "loss": 0.8146, "nll_loss": 0.7864915132522583, "rewards/accuracies": 1.0, "rewards/chosen": -0.24484828114509583, "rewards/margins": 0.17338868975639343, "rewards/rejected": -0.41823700070381165, "step": 1581 }, { "epoch": 4.331279945242985, "grad_norm": 2.590496301651001, "learning_rate": 7.832876712328766e-07, "log_odds_chosen": 2.3744091987609863, "log_odds_ratio": -0.36561140418052673, "logits/chosen": 1.0127540826797485, "logits/rejected": 1.0425496101379395, "logps/chosen": -2.1649246215820312, "logps/rejected": -4.45633602142334, "loss": 0.7173, "nll_loss": 0.6807037591934204, "rewards/accuracies": 0.75, "rewards/chosen": -0.21649247407913208, "rewards/margins": 0.22914114594459534, "rewards/rejected": -0.44563359022140503, "step": 1582 }, { "epoch": 4.334017796030117, "grad_norm": 2.3718419075012207, "learning_rate": 7.831506849315068e-07, "log_odds_chosen": 1.2887924909591675, "log_odds_ratio": -0.3713430166244507, "logits/chosen": 1.0650064945220947, "logits/rejected": 1.0545837879180908, "logps/chosen": -2.2742645740509033, "logps/rejected": -3.4990439414978027, "loss": 0.7999, "nll_loss": 0.7627919912338257, "rewards/accuracies": 0.875, "rewards/chosen": -0.2274264693260193, "rewards/margins": 0.1224779263138771, "rewards/rejected": -0.3499044179916382, "step": 1583 }, { "epoch": 4.336755646817249, "grad_norm": 2.6611287593841553, "learning_rate": 7.83013698630137e-07, "log_odds_chosen": 1.5078469514846802, "log_odds_ratio": -0.2682788670063019, "logits/chosen": 1.0915967226028442, "logits/rejected": 1.0919383764266968, "logps/chosen": -2.47404146194458, "logps/rejected": -3.880397319793701, "loss": 0.7165, "nll_loss": 0.689703643321991, "rewards/accuracies": 1.0, "rewards/chosen": -0.24740415811538696, "rewards/margins": 0.14063559472560883, "rewards/rejected": -0.388039767742157, "step": 1584 }, { "epoch": 4.339493497604381, "grad_norm": 2.1893579959869385, "learning_rate": 7.82876712328767e-07, "log_odds_chosen": 1.6308032274246216, "log_odds_ratio": -0.3826856315135956, "logits/chosen": 0.8927059173583984, "logits/rejected": 0.8969188332557678, "logps/chosen": -2.279294490814209, "logps/rejected": -3.855300188064575, "loss": 0.7897, "nll_loss": 0.7514327764511108, "rewards/accuracies": 0.75, "rewards/chosen": -0.22792945802211761, "rewards/margins": 0.15760058164596558, "rewards/rejected": -0.3855300545692444, "step": 1585 }, { "epoch": 4.342231348391513, "grad_norm": 2.3530046939849854, "learning_rate": 7.827397260273972e-07, "log_odds_chosen": 1.412798285484314, "log_odds_ratio": -0.4695417881011963, "logits/chosen": 0.6212947368621826, "logits/rejected": 0.5027995109558105, "logps/chosen": -2.164857864379883, "logps/rejected": -3.553699016571045, "loss": 0.8804, "nll_loss": 0.8334514498710632, "rewards/accuracies": 0.75, "rewards/chosen": -0.21648581326007843, "rewards/margins": 0.13888411223888397, "rewards/rejected": -0.3553699254989624, "step": 1586 }, { "epoch": 4.344969199178645, "grad_norm": 2.4947824478149414, "learning_rate": 7.826027397260274e-07, "log_odds_chosen": 1.5183124542236328, "log_odds_ratio": -0.38636112213134766, "logits/chosen": 0.7920172214508057, "logits/rejected": 0.765653669834137, "logps/chosen": -1.521186113357544, "logps/rejected": -2.9159066677093506, "loss": 0.8472, "nll_loss": 0.8086035847663879, "rewards/accuracies": 0.75, "rewards/chosen": -0.15211862325668335, "rewards/margins": 0.13947205245494843, "rewards/rejected": -0.29159069061279297, "step": 1587 }, { "epoch": 4.347707049965777, "grad_norm": 2.47810435295105, "learning_rate": 7.824657534246575e-07, "log_odds_chosen": 2.246955156326294, "log_odds_ratio": -0.43449023365974426, "logits/chosen": 0.8185594081878662, "logits/rejected": 0.7786784768104553, "logps/chosen": -2.0444045066833496, "logps/rejected": -4.109062671661377, "loss": 0.8641, "nll_loss": 0.8206263780593872, "rewards/accuracies": 0.625, "rewards/chosen": -0.20444045960903168, "rewards/margins": 0.20646579563617706, "rewards/rejected": -0.41090625524520874, "step": 1588 }, { "epoch": 4.350444900752909, "grad_norm": 2.658635139465332, "learning_rate": 7.823287671232876e-07, "log_odds_chosen": 2.188528537750244, "log_odds_ratio": -0.30502188205718994, "logits/chosen": 1.0448436737060547, "logits/rejected": 0.9827399849891663, "logps/chosen": -2.2190799713134766, "logps/rejected": -4.304042339324951, "loss": 0.8404, "nll_loss": 0.8099005222320557, "rewards/accuracies": 0.875, "rewards/chosen": -0.22190800309181213, "rewards/margins": 0.20849622786045074, "rewards/rejected": -0.4304042458534241, "step": 1589 }, { "epoch": 4.353182751540041, "grad_norm": 2.57707142829895, "learning_rate": 7.821917808219177e-07, "log_odds_chosen": 1.5277793407440186, "log_odds_ratio": -0.2228127121925354, "logits/chosen": 0.9057455062866211, "logits/rejected": 0.9432150721549988, "logps/chosen": -2.454089641571045, "logps/rejected": -3.902345895767212, "loss": 0.7653, "nll_loss": 0.7430073022842407, "rewards/accuracies": 1.0, "rewards/chosen": -0.24540895223617554, "rewards/margins": 0.14482560753822327, "rewards/rejected": -0.3902345895767212, "step": 1590 }, { "epoch": 4.355920602327173, "grad_norm": 3.5348241329193115, "learning_rate": 7.820547945205479e-07, "log_odds_chosen": 0.9467031955718994, "log_odds_ratio": -0.5217294096946716, "logits/chosen": 0.5352520942687988, "logits/rejected": 0.5150243043899536, "logps/chosen": -2.1125266551971436, "logps/rejected": -2.962940216064453, "loss": 0.8294, "nll_loss": 0.7772193551063538, "rewards/accuracies": 0.875, "rewards/chosen": -0.21125268936157227, "rewards/margins": 0.08504132926464081, "rewards/rejected": -0.2962940037250519, "step": 1591 }, { "epoch": 4.358658453114305, "grad_norm": 1.9757702350616455, "learning_rate": 7.81917808219178e-07, "log_odds_chosen": 2.622495174407959, "log_odds_ratio": -0.1245434433221817, "logits/chosen": 1.07091224193573, "logits/rejected": 1.0543609857559204, "logps/chosen": -1.7714612483978271, "logps/rejected": -4.197811126708984, "loss": 0.7116, "nll_loss": 0.6991907358169556, "rewards/accuracies": 1.0, "rewards/chosen": -0.17714613676071167, "rewards/margins": 0.24263498187065125, "rewards/rejected": -0.4197811186313629, "step": 1592 }, { "epoch": 4.361396303901437, "grad_norm": 2.8097035884857178, "learning_rate": 7.817808219178081e-07, "log_odds_chosen": 1.3506971597671509, "log_odds_ratio": -0.26968926191329956, "logits/chosen": 0.7627755403518677, "logits/rejected": 0.7647632360458374, "logps/chosen": -1.739560604095459, "logps/rejected": -2.8152294158935547, "loss": 0.7768, "nll_loss": 0.7498021721839905, "rewards/accuracies": 1.0, "rewards/chosen": -0.17395606637001038, "rewards/margins": 0.10756684839725494, "rewards/rejected": -0.2815229296684265, "step": 1593 }, { "epoch": 4.3641341546885695, "grad_norm": 2.358975410461426, "learning_rate": 7.816438356164383e-07, "log_odds_chosen": 1.4941558837890625, "log_odds_ratio": -0.3195948600769043, "logits/chosen": 0.8742737770080566, "logits/rejected": 0.8595657348632812, "logps/chosen": -1.962062954902649, "logps/rejected": -3.3308663368225098, "loss": 0.8108, "nll_loss": 0.77886563539505, "rewards/accuracies": 1.0, "rewards/chosen": -0.19620630145072937, "rewards/margins": 0.1368803083896637, "rewards/rejected": -0.3330865800380707, "step": 1594 }, { "epoch": 4.3668720054757015, "grad_norm": 2.9412739276885986, "learning_rate": 7.815068493150685e-07, "log_odds_chosen": 2.2264633178710938, "log_odds_ratio": -0.22625303268432617, "logits/chosen": 0.8909151554107666, "logits/rejected": 0.9040835499763489, "logps/chosen": -2.4856762886047363, "logps/rejected": -4.604935646057129, "loss": 0.8029, "nll_loss": 0.7802355885505676, "rewards/accuracies": 0.875, "rewards/chosen": -0.2485676258802414, "rewards/margins": 0.2119259536266327, "rewards/rejected": -0.4604935646057129, "step": 1595 }, { "epoch": 4.369609856262834, "grad_norm": 3.1920390129089355, "learning_rate": 7.813698630136985e-07, "log_odds_chosen": 2.7037150859832764, "log_odds_ratio": -0.17995359003543854, "logits/chosen": 0.8715530037879944, "logits/rejected": 0.7622559666633606, "logps/chosen": -2.407203435897827, "logps/rejected": -4.9901299476623535, "loss": 0.947, "nll_loss": 0.9289901256561279, "rewards/accuracies": 1.0, "rewards/chosen": -0.24072034657001495, "rewards/margins": 0.25829264521598816, "rewards/rejected": -0.4990130364894867, "step": 1596 }, { "epoch": 4.372347707049966, "grad_norm": 2.957002878189087, "learning_rate": 7.812328767123287e-07, "log_odds_chosen": 1.4300282001495361, "log_odds_ratio": -0.3226171135902405, "logits/chosen": 1.010745882987976, "logits/rejected": 1.0277453660964966, "logps/chosen": -2.1910765171051025, "logps/rejected": -3.4912102222442627, "loss": 0.7258, "nll_loss": 0.6934930682182312, "rewards/accuracies": 1.0, "rewards/chosen": -0.21910765767097473, "rewards/margins": 0.1300133466720581, "rewards/rejected": -0.3491210341453552, "step": 1597 }, { "epoch": 4.375085557837098, "grad_norm": 2.2024331092834473, "learning_rate": 7.810958904109589e-07, "log_odds_chosen": 2.58457088470459, "log_odds_ratio": -0.14848476648330688, "logits/chosen": 1.1281968355178833, "logits/rejected": 1.1624501943588257, "logps/chosen": -2.193878650665283, "logps/rejected": -4.670312881469727, "loss": 0.6857, "nll_loss": 0.670869767665863, "rewards/accuracies": 1.0, "rewards/chosen": -0.21938788890838623, "rewards/margins": 0.24764344096183777, "rewards/rejected": -0.4670313000679016, "step": 1598 }, { "epoch": 4.37782340862423, "grad_norm": 2.6275277137756348, "learning_rate": 7.809589041095889e-07, "log_odds_chosen": 1.5454974174499512, "log_odds_ratio": -0.39809733629226685, "logits/chosen": 1.0103285312652588, "logits/rejected": 1.0629632472991943, "logps/chosen": -2.8498282432556152, "logps/rejected": -4.397830963134766, "loss": 0.8264, "nll_loss": 0.7866395115852356, "rewards/accuracies": 0.875, "rewards/chosen": -0.2849828004837036, "rewards/margins": 0.15480023622512817, "rewards/rejected": -0.4397830665111542, "step": 1599 }, { "epoch": 4.380561259411362, "grad_norm": 3.3693222999572754, "learning_rate": 7.808219178082191e-07, "log_odds_chosen": 1.6257966756820679, "log_odds_ratio": -0.5277543067932129, "logits/chosen": 1.0144381523132324, "logits/rejected": 0.9722557067871094, "logps/chosen": -2.517592668533325, "logps/rejected": -4.0172119140625, "loss": 0.9037, "nll_loss": 0.8508979082107544, "rewards/accuracies": 0.875, "rewards/chosen": -0.25175929069519043, "rewards/margins": 0.1499619334936142, "rewards/rejected": -0.40172117948532104, "step": 1600 }, { "epoch": 4.383299110198494, "grad_norm": 2.792247772216797, "learning_rate": 7.806849315068493e-07, "log_odds_chosen": 0.619387149810791, "log_odds_ratio": -0.5385338068008423, "logits/chosen": 1.0062832832336426, "logits/rejected": 0.9638369083404541, "logps/chosen": -2.100605010986328, "logps/rejected": -2.688023805618286, "loss": 0.824, "nll_loss": 0.7701600790023804, "rewards/accuracies": 0.625, "rewards/chosen": -0.21006053686141968, "rewards/margins": 0.05874187499284744, "rewards/rejected": -0.2688024044036865, "step": 1601 }, { "epoch": 4.386036960985626, "grad_norm": 2.534024953842163, "learning_rate": 7.805479452054794e-07, "log_odds_chosen": 1.757890224456787, "log_odds_ratio": -0.23469941318035126, "logits/chosen": 0.7397351264953613, "logits/rejected": 0.6694161891937256, "logps/chosen": -1.8630797863006592, "logps/rejected": -3.4663100242614746, "loss": 0.7599, "nll_loss": 0.7364108562469482, "rewards/accuracies": 1.0, "rewards/chosen": -0.18630798161029816, "rewards/margins": 0.16032303869724274, "rewards/rejected": -0.3466309905052185, "step": 1602 }, { "epoch": 4.388774811772758, "grad_norm": 2.674708843231201, "learning_rate": 7.804109589041095e-07, "log_odds_chosen": 2.1232194900512695, "log_odds_ratio": -0.24731123447418213, "logits/chosen": 1.113637924194336, "logits/rejected": 1.1310104131698608, "logps/chosen": -2.404935359954834, "logps/rejected": -4.446493148803711, "loss": 0.7555, "nll_loss": 0.7307210564613342, "rewards/accuracies": 1.0, "rewards/chosen": -0.2404935359954834, "rewards/margins": 0.20415572822093964, "rewards/rejected": -0.44464927911758423, "step": 1603 }, { "epoch": 4.39151266255989, "grad_norm": 2.4690346717834473, "learning_rate": 7.802739726027396e-07, "log_odds_chosen": 1.7360798120498657, "log_odds_ratio": -0.281585693359375, "logits/chosen": 0.7318875789642334, "logits/rejected": 0.7080594897270203, "logps/chosen": -1.6318354606628418, "logps/rejected": -3.18318247795105, "loss": 0.827, "nll_loss": 0.7988897562026978, "rewards/accuracies": 0.875, "rewards/chosen": -0.1631835550069809, "rewards/margins": 0.1551346778869629, "rewards/rejected": -0.318318247795105, "step": 1604 }, { "epoch": 4.394250513347022, "grad_norm": 2.1013903617858887, "learning_rate": 7.801369863013698e-07, "log_odds_chosen": 1.9226694107055664, "log_odds_ratio": -0.28497904539108276, "logits/chosen": 0.7620193958282471, "logits/rejected": 0.7761604189872742, "logps/chosen": -2.030226707458496, "logps/rejected": -3.810195207595825, "loss": 0.7447, "nll_loss": 0.7162046432495117, "rewards/accuracies": 1.0, "rewards/chosen": -0.20302265882492065, "rewards/margins": 0.17799688875675201, "rewards/rejected": -0.3810195326805115, "step": 1605 }, { "epoch": 4.396988364134154, "grad_norm": 3.9028866291046143, "learning_rate": 7.799999999999999e-07, "log_odds_chosen": 0.7511436939239502, "log_odds_ratio": -0.5042373538017273, "logits/chosen": 0.8992100954055786, "logits/rejected": 0.884596586227417, "logps/chosen": -2.7124218940734863, "logps/rejected": -3.367288827896118, "loss": 0.7923, "nll_loss": 0.7419063448905945, "rewards/accuracies": 0.625, "rewards/chosen": -0.2712422013282776, "rewards/margins": 0.06548666954040527, "rewards/rejected": -0.33672887086868286, "step": 1606 }, { "epoch": 4.399726214921287, "grad_norm": 2.3266372680664062, "learning_rate": 7.7986301369863e-07, "log_odds_chosen": 2.7657387256622314, "log_odds_ratio": -0.19706249237060547, "logits/chosen": 0.6705130934715271, "logits/rejected": 0.6315199732780457, "logps/chosen": -2.433264970779419, "logps/rejected": -5.081361293792725, "loss": 0.8669, "nll_loss": 0.84716796875, "rewards/accuracies": 1.0, "rewards/chosen": -0.24332651495933533, "rewards/margins": 0.26480963826179504, "rewards/rejected": -0.5081361532211304, "step": 1607 }, { "epoch": 4.402464065708419, "grad_norm": 3.833509683609009, "learning_rate": 7.797260273972602e-07, "log_odds_chosen": 1.3611928224563599, "log_odds_ratio": -0.5778595209121704, "logits/chosen": 0.8318132162094116, "logits/rejected": 0.8418258428573608, "logps/chosen": -2.6815271377563477, "logps/rejected": -3.966320037841797, "loss": 0.8989, "nll_loss": 0.8411568999290466, "rewards/accuracies": 0.75, "rewards/chosen": -0.26815271377563477, "rewards/margins": 0.12847930192947388, "rewards/rejected": -0.39663198590278625, "step": 1608 }, { "epoch": 4.405201916495551, "grad_norm": 2.8628599643707275, "learning_rate": 7.795890410958905e-07, "log_odds_chosen": 1.6183381080627441, "log_odds_ratio": -0.4018746614456177, "logits/chosen": 0.6046041250228882, "logits/rejected": 0.48691415786743164, "logps/chosen": -1.8968255519866943, "logps/rejected": -3.448559284210205, "loss": 0.9158, "nll_loss": 0.8756554126739502, "rewards/accuracies": 0.75, "rewards/chosen": -0.18968254327774048, "rewards/margins": 0.1551733911037445, "rewards/rejected": -0.3448559641838074, "step": 1609 }, { "epoch": 4.407939767282683, "grad_norm": 2.2965078353881836, "learning_rate": 7.794520547945204e-07, "log_odds_chosen": 2.764831066131592, "log_odds_ratio": -0.11904007941484451, "logits/chosen": 1.1017500162124634, "logits/rejected": 1.128510594367981, "logps/chosen": -1.7316176891326904, "logps/rejected": -4.244845390319824, "loss": 0.7127, "nll_loss": 0.7007527947425842, "rewards/accuracies": 1.0, "rewards/chosen": -0.17316177487373352, "rewards/margins": 0.25132277607917786, "rewards/rejected": -0.4244845509529114, "step": 1610 }, { "epoch": 4.4106776180698155, "grad_norm": 2.4766616821289062, "learning_rate": 7.793150684931507e-07, "log_odds_chosen": 1.014592170715332, "log_odds_ratio": -0.3746272027492523, "logits/chosen": 0.9254416227340698, "logits/rejected": 0.907905101776123, "logps/chosen": -2.1124486923217773, "logps/rejected": -3.0489630699157715, "loss": 0.7814, "nll_loss": 0.7439013123512268, "rewards/accuracies": 1.0, "rewards/chosen": -0.2112448811531067, "rewards/margins": 0.0936514288187027, "rewards/rejected": -0.3048962950706482, "step": 1611 }, { "epoch": 4.413415468856948, "grad_norm": 7.509730815887451, "learning_rate": 7.791780821917809e-07, "log_odds_chosen": 3.0452401638031006, "log_odds_ratio": -0.6638375520706177, "logits/chosen": 0.9001789093017578, "logits/rejected": 0.8667099475860596, "logps/chosen": -2.3300671577453613, "logps/rejected": -5.093623161315918, "loss": 0.8463, "nll_loss": 0.779953122138977, "rewards/accuracies": 0.875, "rewards/chosen": -0.23300673067569733, "rewards/margins": 0.2763555943965912, "rewards/rejected": -0.5093623399734497, "step": 1612 }, { "epoch": 4.41615331964408, "grad_norm": 2.367220878601074, "learning_rate": 7.790410958904108e-07, "log_odds_chosen": 2.6318578720092773, "log_odds_ratio": -0.24378180503845215, "logits/chosen": 0.8655067682266235, "logits/rejected": 0.8666045069694519, "logps/chosen": -1.9179229736328125, "logps/rejected": -4.348989963531494, "loss": 0.7176, "nll_loss": 0.6932123899459839, "rewards/accuracies": 0.875, "rewards/chosen": -0.191792294383049, "rewards/margins": 0.24310670793056488, "rewards/rejected": -0.4348989725112915, "step": 1613 }, { "epoch": 4.418891170431212, "grad_norm": 2.749753475189209, "learning_rate": 7.789041095890411e-07, "log_odds_chosen": 1.873508095741272, "log_odds_ratio": -0.47549885511398315, "logits/chosen": 0.9766994714736938, "logits/rejected": 0.9735760688781738, "logps/chosen": -2.6418204307556152, "logps/rejected": -4.394586563110352, "loss": 0.9487, "nll_loss": 0.901195764541626, "rewards/accuracies": 0.875, "rewards/chosen": -0.26418203115463257, "rewards/margins": 0.17527660727500916, "rewards/rejected": -0.4394586682319641, "step": 1614 }, { "epoch": 4.421629021218344, "grad_norm": 2.542880058288574, "learning_rate": 7.787671232876713e-07, "log_odds_chosen": 1.7701005935668945, "log_odds_ratio": -0.2564181387424469, "logits/chosen": 0.5817480087280273, "logits/rejected": 0.5553026795387268, "logps/chosen": -1.8177692890167236, "logps/rejected": -3.4119739532470703, "loss": 0.8162, "nll_loss": 0.7905871868133545, "rewards/accuracies": 1.0, "rewards/chosen": -0.18177694082260132, "rewards/margins": 0.15942047536373138, "rewards/rejected": -0.3411974310874939, "step": 1615 }, { "epoch": 4.424366872005476, "grad_norm": 2.6062872409820557, "learning_rate": 7.786301369863014e-07, "log_odds_chosen": 0.825785756111145, "log_odds_ratio": -0.3961640000343323, "logits/chosen": 0.7947100400924683, "logits/rejected": 0.784866452217102, "logps/chosen": -1.871983289718628, "logps/rejected": -2.595938205718994, "loss": 0.8009, "nll_loss": 0.7613324522972107, "rewards/accuracies": 0.875, "rewards/chosen": -0.18719831109046936, "rewards/margins": 0.07239550352096558, "rewards/rejected": -0.2595938444137573, "step": 1616 }, { "epoch": 4.427104722792608, "grad_norm": 4.220090866088867, "learning_rate": 7.784931506849315e-07, "log_odds_chosen": 1.1875481605529785, "log_odds_ratio": -0.6655469536781311, "logits/chosen": 0.5630832314491272, "logits/rejected": 0.5890258550643921, "logps/chosen": -2.8234028816223145, "logps/rejected": -3.901451587677002, "loss": 0.9542, "nll_loss": 0.8876889944076538, "rewards/accuracies": 0.875, "rewards/chosen": -0.28234031796455383, "rewards/margins": 0.10780487954616547, "rewards/rejected": -0.3901451826095581, "step": 1617 }, { "epoch": 4.42984257357974, "grad_norm": 2.4101710319519043, "learning_rate": 7.783561643835616e-07, "log_odds_chosen": 2.2224812507629395, "log_odds_ratio": -0.2360159456729889, "logits/chosen": 1.0085787773132324, "logits/rejected": 1.0286777019500732, "logps/chosen": -1.9907844066619873, "logps/rejected": -4.106194496154785, "loss": 0.7182, "nll_loss": 0.69460529088974, "rewards/accuracies": 0.875, "rewards/chosen": -0.19907842576503754, "rewards/margins": 0.2115410566329956, "rewards/rejected": -0.41061949729919434, "step": 1618 }, { "epoch": 4.432580424366872, "grad_norm": 3.084773063659668, "learning_rate": 7.782191780821918e-07, "log_odds_chosen": 1.9797159433364868, "log_odds_ratio": -0.39104166626930237, "logits/chosen": 0.9353567361831665, "logits/rejected": 0.967875599861145, "logps/chosen": -2.7013955116271973, "logps/rejected": -4.608721733093262, "loss": 0.7941, "nll_loss": 0.7550303936004639, "rewards/accuracies": 0.875, "rewards/chosen": -0.27013957500457764, "rewards/margins": 0.19073261320590973, "rewards/rejected": -0.46087217330932617, "step": 1619 }, { "epoch": 4.435318275154004, "grad_norm": 4.097414016723633, "learning_rate": 7.780821917808219e-07, "log_odds_chosen": 0.2775012254714966, "log_odds_ratio": -0.721682071685791, "logits/chosen": 0.6304537057876587, "logits/rejected": 0.5600178241729736, "logps/chosen": -2.876992702484131, "logps/rejected": -3.090409517288208, "loss": 0.8694, "nll_loss": 0.7972505688667297, "rewards/accuracies": 0.625, "rewards/chosen": -0.28769928216934204, "rewards/margins": 0.02134166657924652, "rewards/rejected": -0.30904093384742737, "step": 1620 }, { "epoch": 4.438056125941136, "grad_norm": 4.325239181518555, "learning_rate": 7.77945205479452e-07, "log_odds_chosen": 1.0570805072784424, "log_odds_ratio": -0.39170360565185547, "logits/chosen": 0.7767358422279358, "logits/rejected": 0.7392828464508057, "logps/chosen": -2.510716438293457, "logps/rejected": -3.494239568710327, "loss": 0.8565, "nll_loss": 0.817337155342102, "rewards/accuracies": 0.875, "rewards/chosen": -0.25107163190841675, "rewards/margins": 0.0983523279428482, "rewards/rejected": -0.34942394495010376, "step": 1621 }, { "epoch": 4.440793976728268, "grad_norm": 2.3459134101867676, "learning_rate": 7.778082191780822e-07, "log_odds_chosen": 2.054594039916992, "log_odds_ratio": -0.35291504859924316, "logits/chosen": 0.7330218553543091, "logits/rejected": 0.8022485375404358, "logps/chosen": -2.2092297077178955, "logps/rejected": -4.151717185974121, "loss": 0.8528, "nll_loss": 0.8174738883972168, "rewards/accuracies": 0.875, "rewards/chosen": -0.22092297673225403, "rewards/margins": 0.19424879550933838, "rewards/rejected": -0.41517174243927, "step": 1622 }, { "epoch": 4.4435318275154, "grad_norm": 2.5575265884399414, "learning_rate": 7.776712328767124e-07, "log_odds_chosen": 0.9144078493118286, "log_odds_ratio": -0.41020187735557556, "logits/chosen": 0.7016605138778687, "logits/rejected": 0.5630669593811035, "logps/chosen": -1.5866807699203491, "logps/rejected": -2.3738744258880615, "loss": 0.8381, "nll_loss": 0.7970759868621826, "rewards/accuracies": 1.0, "rewards/chosen": -0.15866807103157043, "rewards/margins": 0.078719362616539, "rewards/rejected": -0.23738743364810944, "step": 1623 }, { "epoch": 4.446269678302532, "grad_norm": 3.137075901031494, "learning_rate": 7.775342465753424e-07, "log_odds_chosen": 1.1824986934661865, "log_odds_ratio": -0.42865830659866333, "logits/chosen": 0.8128169178962708, "logits/rejected": 0.7474877834320068, "logps/chosen": -1.8211454153060913, "logps/rejected": -2.824169635772705, "loss": 0.7775, "nll_loss": 0.7346543073654175, "rewards/accuracies": 0.875, "rewards/chosen": -0.18211454153060913, "rewards/margins": 0.10030244290828705, "rewards/rejected": -0.2824169993400574, "step": 1624 }, { "epoch": 4.4490075290896645, "grad_norm": 3.490316152572632, "learning_rate": 7.773972602739726e-07, "log_odds_chosen": 0.798773467540741, "log_odds_ratio": -0.6112072467803955, "logits/chosen": 0.7198523283004761, "logits/rejected": 0.6794655323028564, "logps/chosen": -2.7241458892822266, "logps/rejected": -3.4939727783203125, "loss": 0.8501, "nll_loss": 0.7890254855155945, "rewards/accuracies": 0.75, "rewards/chosen": -0.27241456508636475, "rewards/margins": 0.07698269188404083, "rewards/rejected": -0.3493972718715668, "step": 1625 }, { "epoch": 4.4517453798767965, "grad_norm": 2.8791041374206543, "learning_rate": 7.772602739726028e-07, "log_odds_chosen": 1.1279159784317017, "log_odds_ratio": -0.4407157599925995, "logits/chosen": 0.8457029461860657, "logits/rejected": 0.8399591445922852, "logps/chosen": -2.1415467262268066, "logps/rejected": -3.2278058528900146, "loss": 0.7755, "nll_loss": 0.7314695119857788, "rewards/accuracies": 0.75, "rewards/chosen": -0.2141546607017517, "rewards/margins": 0.10862593352794647, "rewards/rejected": -0.3227806091308594, "step": 1626 }, { "epoch": 4.454483230663929, "grad_norm": 3.0562102794647217, "learning_rate": 7.771232876712328e-07, "log_odds_chosen": 1.0484111309051514, "log_odds_ratio": -0.5320797562599182, "logits/chosen": 0.915770947933197, "logits/rejected": 0.9004281163215637, "logps/chosen": -1.9720243215560913, "logps/rejected": -2.816897392272949, "loss": 0.8858, "nll_loss": 0.8326185941696167, "rewards/accuracies": 0.75, "rewards/chosen": -0.1972024291753769, "rewards/margins": 0.08448730409145355, "rewards/rejected": -0.28168973326683044, "step": 1627 }, { "epoch": 4.457221081451061, "grad_norm": 2.2269599437713623, "learning_rate": 7.76986301369863e-07, "log_odds_chosen": 2.5732202529907227, "log_odds_ratio": -0.18682292103767395, "logits/chosen": 0.8493784070014954, "logits/rejected": 0.8616673350334167, "logps/chosen": -1.8826777935028076, "logps/rejected": -4.308990478515625, "loss": 0.7297, "nll_loss": 0.7109885811805725, "rewards/accuracies": 0.875, "rewards/chosen": -0.1882677674293518, "rewards/margins": 0.24263127148151398, "rewards/rejected": -0.430899053812027, "step": 1628 }, { "epoch": 4.459958932238193, "grad_norm": 2.1890528202056885, "learning_rate": 7.768493150684932e-07, "log_odds_chosen": 2.544837236404419, "log_odds_ratio": -0.3126703202724457, "logits/chosen": 0.8042869567871094, "logits/rejected": 0.7709352374076843, "logps/chosen": -2.173092842102051, "logps/rejected": -4.66392707824707, "loss": 0.8258, "nll_loss": 0.7945429086685181, "rewards/accuracies": 0.875, "rewards/chosen": -0.21730928122997284, "rewards/margins": 0.24908342957496643, "rewards/rejected": -0.4663926959037781, "step": 1629 }, { "epoch": 4.462696783025325, "grad_norm": 3.018300771713257, "learning_rate": 7.767123287671233e-07, "log_odds_chosen": 1.5066251754760742, "log_odds_ratio": -0.46329033374786377, "logits/chosen": 0.6386318802833557, "logits/rejected": 0.572381317615509, "logps/chosen": -2.1322646141052246, "logps/rejected": -3.5342679023742676, "loss": 0.9763, "nll_loss": 0.9300129413604736, "rewards/accuracies": 0.625, "rewards/chosen": -0.21322648227214813, "rewards/margins": 0.14020030200481415, "rewards/rejected": -0.3534267842769623, "step": 1630 }, { "epoch": 4.465434633812457, "grad_norm": 2.3900058269500732, "learning_rate": 7.765753424657534e-07, "log_odds_chosen": 1.2038120031356812, "log_odds_ratio": -0.361066073179245, "logits/chosen": 0.8843356370925903, "logits/rejected": 0.8573213815689087, "logps/chosen": -2.3874924182891846, "logps/rejected": -3.4965837001800537, "loss": 0.8118, "nll_loss": 0.7757297158241272, "rewards/accuracies": 0.875, "rewards/chosen": -0.23874926567077637, "rewards/margins": 0.110909104347229, "rewards/rejected": -0.34965837001800537, "step": 1631 }, { "epoch": 4.468172484599589, "grad_norm": 3.912621021270752, "learning_rate": 7.764383561643836e-07, "log_odds_chosen": 1.1731613874435425, "log_odds_ratio": -0.4548526704311371, "logits/chosen": 1.02207350730896, "logits/rejected": 1.0052332878112793, "logps/chosen": -2.6128146648406982, "logps/rejected": -3.6990208625793457, "loss": 0.8515, "nll_loss": 0.8060319423675537, "rewards/accuracies": 0.75, "rewards/chosen": -0.26128149032592773, "rewards/margins": 0.10862059891223907, "rewards/rejected": -0.3699020743370056, "step": 1632 }, { "epoch": 4.470910335386721, "grad_norm": 3.0691277980804443, "learning_rate": 7.763013698630137e-07, "log_odds_chosen": 1.84084951877594, "log_odds_ratio": -0.30825138092041016, "logits/chosen": 0.8276029825210571, "logits/rejected": 0.7740331888198853, "logps/chosen": -2.386523485183716, "logps/rejected": -4.129581451416016, "loss": 0.8482, "nll_loss": 0.8173743486404419, "rewards/accuracies": 1.0, "rewards/chosen": -0.23865236341953278, "rewards/margins": 0.1743057817220688, "rewards/rejected": -0.41295814514160156, "step": 1633 }, { "epoch": 4.473648186173854, "grad_norm": 2.2149436473846436, "learning_rate": 7.761643835616438e-07, "log_odds_chosen": 3.131523609161377, "log_odds_ratio": -0.2266322672367096, "logits/chosen": 0.9779013395309448, "logits/rejected": 0.9944605827331543, "logps/chosen": -1.9078567028045654, "logps/rejected": -4.901138782501221, "loss": 0.6599, "nll_loss": 0.6371954679489136, "rewards/accuracies": 0.875, "rewards/chosen": -0.19078567624092102, "rewards/margins": 0.29932817816734314, "rewards/rejected": -0.49011385440826416, "step": 1634 }, { "epoch": 4.476386036960986, "grad_norm": 3.072422742843628, "learning_rate": 7.760273972602739e-07, "log_odds_chosen": 1.5514719486236572, "log_odds_ratio": -0.34420621395111084, "logits/chosen": 0.4600715637207031, "logits/rejected": 0.5093622803688049, "logps/chosen": -1.902245044708252, "logps/rejected": -3.3250133991241455, "loss": 0.798, "nll_loss": 0.7635573148727417, "rewards/accuracies": 0.875, "rewards/chosen": -0.1902245283126831, "rewards/margins": 0.1422768235206604, "rewards/rejected": -0.3325013518333435, "step": 1635 }, { "epoch": 4.479123887748118, "grad_norm": 2.8228983879089355, "learning_rate": 7.758904109589041e-07, "log_odds_chosen": 1.467576026916504, "log_odds_ratio": -0.46733492612838745, "logits/chosen": 0.7766159176826477, "logits/rejected": 0.7672497034072876, "logps/chosen": -2.302670955657959, "logps/rejected": -3.63454532623291, "loss": 0.8278, "nll_loss": 0.7810519933700562, "rewards/accuracies": 0.75, "rewards/chosen": -0.23026710748672485, "rewards/margins": 0.1331874430179596, "rewards/rejected": -0.36345458030700684, "step": 1636 }, { "epoch": 4.48186173853525, "grad_norm": 2.8219821453094482, "learning_rate": 7.757534246575343e-07, "log_odds_chosen": 1.0903840065002441, "log_odds_ratio": -0.4046710133552551, "logits/chosen": 0.5753660202026367, "logits/rejected": 0.4656268060207367, "logps/chosen": -1.6398175954818726, "logps/rejected": -2.6002354621887207, "loss": 0.8653, "nll_loss": 0.8248239159584045, "rewards/accuracies": 0.875, "rewards/chosen": -0.16398176550865173, "rewards/margins": 0.09604179859161377, "rewards/rejected": -0.2600235641002655, "step": 1637 }, { "epoch": 4.484599589322382, "grad_norm": 2.8804502487182617, "learning_rate": 7.756164383561643e-07, "log_odds_chosen": 1.6384223699569702, "log_odds_ratio": -0.47635915875434875, "logits/chosen": 1.0342113971710205, "logits/rejected": 1.0701687335968018, "logps/chosen": -2.626495599746704, "logps/rejected": -4.185967922210693, "loss": 0.8453, "nll_loss": 0.7976785898208618, "rewards/accuracies": 0.875, "rewards/chosen": -0.2626495361328125, "rewards/margins": 0.1559472233057022, "rewards/rejected": -0.4185967445373535, "step": 1638 }, { "epoch": 4.487337440109514, "grad_norm": 2.29056453704834, "learning_rate": 7.754794520547945e-07, "log_odds_chosen": 2.3382201194763184, "log_odds_ratio": -0.3151800036430359, "logits/chosen": 0.7924215793609619, "logits/rejected": 0.7724250555038452, "logps/chosen": -2.062333345413208, "logps/rejected": -4.294757843017578, "loss": 0.8008, "nll_loss": 0.769284188747406, "rewards/accuracies": 0.875, "rewards/chosen": -0.20623333752155304, "rewards/margins": 0.22324246168136597, "rewards/rejected": -0.4294757843017578, "step": 1639 }, { "epoch": 4.490075290896646, "grad_norm": 3.202563762664795, "learning_rate": 7.753424657534247e-07, "log_odds_chosen": 1.0210254192352295, "log_odds_ratio": -0.6193140745162964, "logits/chosen": 0.7773930430412292, "logits/rejected": 0.7941442728042603, "logps/chosen": -2.0142159461975098, "logps/rejected": -2.932076930999756, "loss": 0.8853, "nll_loss": 0.8233461380004883, "rewards/accuracies": 0.625, "rewards/chosen": -0.2014216035604477, "rewards/margins": 0.09178606420755386, "rewards/rejected": -0.29320767521858215, "step": 1640 }, { "epoch": 4.492813141683778, "grad_norm": 2.084213972091675, "learning_rate": 7.752054794520547e-07, "log_odds_chosen": 1.3763251304626465, "log_odds_ratio": -0.29107382893562317, "logits/chosen": 0.5572899580001831, "logits/rejected": 0.4770444631576538, "logps/chosen": -1.6115087270736694, "logps/rejected": -2.83526611328125, "loss": 0.7551, "nll_loss": 0.7260342240333557, "rewards/accuracies": 1.0, "rewards/chosen": -0.16115087270736694, "rewards/margins": 0.12237576395273209, "rewards/rejected": -0.2835266590118408, "step": 1641 }, { "epoch": 4.4955509924709105, "grad_norm": 3.893275499343872, "learning_rate": 7.750684931506849e-07, "log_odds_chosen": 0.4257238507270813, "log_odds_ratio": -0.7115960121154785, "logits/chosen": 1.0420305728912354, "logits/rejected": 1.0353689193725586, "logps/chosen": -2.3628664016723633, "logps/rejected": -2.692875862121582, "loss": 0.8243, "nll_loss": 0.7531185150146484, "rewards/accuracies": 0.75, "rewards/chosen": -0.23628666996955872, "rewards/margins": 0.03300092741847038, "rewards/rejected": -0.2692875862121582, "step": 1642 }, { "epoch": 4.498288843258043, "grad_norm": 5.059042453765869, "learning_rate": 7.749315068493151e-07, "log_odds_chosen": 1.0553795099258423, "log_odds_ratio": -0.4780202805995941, "logits/chosen": 0.8912715315818787, "logits/rejected": 0.9784672260284424, "logps/chosen": -2.535210371017456, "logps/rejected": -3.4494028091430664, "loss": 0.8561, "nll_loss": 0.8082851767539978, "rewards/accuracies": 0.75, "rewards/chosen": -0.25352105498313904, "rewards/margins": 0.09141924977302551, "rewards/rejected": -0.34494030475616455, "step": 1643 }, { "epoch": 4.501026694045175, "grad_norm": 2.546780586242676, "learning_rate": 7.747945205479452e-07, "log_odds_chosen": 2.3952033519744873, "log_odds_ratio": -0.305200457572937, "logits/chosen": 0.8874465227127075, "logits/rejected": 0.8920258283615112, "logps/chosen": -2.8338775634765625, "logps/rejected": -5.121834754943848, "loss": 0.7974, "nll_loss": 0.7668706774711609, "rewards/accuracies": 0.875, "rewards/chosen": -0.2833877205848694, "rewards/margins": 0.22879578173160553, "rewards/rejected": -0.5121835470199585, "step": 1644 }, { "epoch": 4.503764544832307, "grad_norm": 2.071070432662964, "learning_rate": 7.746575342465753e-07, "log_odds_chosen": 2.7134432792663574, "log_odds_ratio": -0.22799040377140045, "logits/chosen": 0.8680207133293152, "logits/rejected": 0.8605576157569885, "logps/chosen": -1.6154097318649292, "logps/rejected": -4.124527931213379, "loss": 0.7884, "nll_loss": 0.7655837535858154, "rewards/accuracies": 1.0, "rewards/chosen": -0.16154098510742188, "rewards/margins": 0.2509118318557739, "rewards/rejected": -0.4124528169631958, "step": 1645 }, { "epoch": 4.506502395619439, "grad_norm": 2.0178678035736084, "learning_rate": 7.745205479452055e-07, "log_odds_chosen": 3.04711651802063, "log_odds_ratio": -0.14939787983894348, "logits/chosen": 0.6854617595672607, "logits/rejected": 0.5963298082351685, "logps/chosen": -2.2547550201416016, "logps/rejected": -5.145550727844238, "loss": 0.7819, "nll_loss": 0.7670082449913025, "rewards/accuracies": 1.0, "rewards/chosen": -0.2254755198955536, "rewards/margins": 0.2890795171260834, "rewards/rejected": -0.514555037021637, "step": 1646 }, { "epoch": 4.509240246406571, "grad_norm": 2.8830935955047607, "learning_rate": 7.743835616438356e-07, "log_odds_chosen": 1.343597412109375, "log_odds_ratio": -0.36163121461868286, "logits/chosen": 0.5550650358200073, "logits/rejected": 0.4592890441417694, "logps/chosen": -2.0815210342407227, "logps/rejected": -3.320206880569458, "loss": 0.8772, "nll_loss": 0.8410202264785767, "rewards/accuracies": 0.875, "rewards/chosen": -0.20815211534500122, "rewards/margins": 0.12386859953403473, "rewards/rejected": -0.33202069997787476, "step": 1647 }, { "epoch": 4.511978097193703, "grad_norm": 2.4844088554382324, "learning_rate": 7.742465753424657e-07, "log_odds_chosen": 1.0829824209213257, "log_odds_ratio": -0.4284946918487549, "logits/chosen": 0.5361377000808716, "logits/rejected": 0.5030803084373474, "logps/chosen": -1.9597113132476807, "logps/rejected": -2.9371979236602783, "loss": 0.9005, "nll_loss": 0.8576307892799377, "rewards/accuracies": 0.875, "rewards/chosen": -0.19597113132476807, "rewards/margins": 0.09774866700172424, "rewards/rejected": -0.2937198281288147, "step": 1648 }, { "epoch": 4.514715947980835, "grad_norm": 2.4161972999572754, "learning_rate": 7.741095890410958e-07, "log_odds_chosen": 2.842989921569824, "log_odds_ratio": -0.2574893832206726, "logits/chosen": 0.8891502618789673, "logits/rejected": 0.8969066143035889, "logps/chosen": -2.0367891788482666, "logps/rejected": -4.701155185699463, "loss": 0.763, "nll_loss": 0.7372724413871765, "rewards/accuracies": 0.875, "rewards/chosen": -0.2036789208650589, "rewards/margins": 0.2664366364479065, "rewards/rejected": -0.4701155722141266, "step": 1649 }, { "epoch": 4.517453798767967, "grad_norm": 3.4950222969055176, "learning_rate": 7.73972602739726e-07, "log_odds_chosen": 1.5535166263580322, "log_odds_ratio": -0.3695606589317322, "logits/chosen": 0.7088707685470581, "logits/rejected": 0.6615040302276611, "logps/chosen": -1.801243782043457, "logps/rejected": -3.198033332824707, "loss": 0.7634, "nll_loss": 0.726426362991333, "rewards/accuracies": 0.875, "rewards/chosen": -0.18012438714504242, "rewards/margins": 0.1396789699792862, "rewards/rejected": -0.3198033571243286, "step": 1650 }, { "epoch": 4.520191649555099, "grad_norm": 2.6692895889282227, "learning_rate": 7.738356164383562e-07, "log_odds_chosen": 1.6880578994750977, "log_odds_ratio": -0.2578483521938324, "logits/chosen": 1.08878755569458, "logits/rejected": 1.0982948541641235, "logps/chosen": -2.528655529022217, "logps/rejected": -4.129507064819336, "loss": 0.7269, "nll_loss": 0.7011623382568359, "rewards/accuracies": 1.0, "rewards/chosen": -0.25286558270454407, "rewards/margins": 0.16008514165878296, "rewards/rejected": -0.41295069456100464, "step": 1651 }, { "epoch": 4.522929500342231, "grad_norm": 2.4205336570739746, "learning_rate": 7.736986301369862e-07, "log_odds_chosen": 1.457419991493225, "log_odds_ratio": -0.2877456843852997, "logits/chosen": 0.4493561387062073, "logits/rejected": 0.32816287875175476, "logps/chosen": -1.8869965076446533, "logps/rejected": -3.1682727336883545, "loss": 0.8368, "nll_loss": 0.8080233931541443, "rewards/accuracies": 1.0, "rewards/chosen": -0.1886996328830719, "rewards/margins": 0.12812763452529907, "rewards/rejected": -0.31682726740837097, "step": 1652 }, { "epoch": 4.525667351129363, "grad_norm": 2.6508395671844482, "learning_rate": 7.735616438356164e-07, "log_odds_chosen": 1.167739987373352, "log_odds_ratio": -0.40786999464035034, "logits/chosen": 0.6044008731842041, "logits/rejected": 0.4928143322467804, "logps/chosen": -1.637674331665039, "logps/rejected": -2.696620464324951, "loss": 0.7947, "nll_loss": 0.7539501786231995, "rewards/accuracies": 0.875, "rewards/chosen": -0.16376744210720062, "rewards/margins": 0.10589459538459778, "rewards/rejected": -0.2696620523929596, "step": 1653 }, { "epoch": 4.528405201916495, "grad_norm": 2.5205836296081543, "learning_rate": 7.734246575342466e-07, "log_odds_chosen": 1.9199891090393066, "log_odds_ratio": -0.29555824398994446, "logits/chosen": 0.8415673971176147, "logits/rejected": 0.7271163463592529, "logps/chosen": -2.6788010597229004, "logps/rejected": -4.49682092666626, "loss": 0.7617, "nll_loss": 0.7321704626083374, "rewards/accuracies": 0.875, "rewards/chosen": -0.2678801119327545, "rewards/margins": 0.18180200457572937, "rewards/rejected": -0.4496821463108063, "step": 1654 }, { "epoch": 4.531143052703627, "grad_norm": 2.4475512504577637, "learning_rate": 7.732876712328766e-07, "log_odds_chosen": 2.6527600288391113, "log_odds_ratio": -0.2708771526813507, "logits/chosen": 1.0246084928512573, "logits/rejected": 1.0252271890640259, "logps/chosen": -2.341583013534546, "logps/rejected": -4.902139663696289, "loss": 0.792, "nll_loss": 0.7648994326591492, "rewards/accuracies": 0.75, "rewards/chosen": -0.23415830731391907, "rewards/margins": 0.25605568289756775, "rewards/rejected": -0.49021396040916443, "step": 1655 }, { "epoch": 4.5338809034907595, "grad_norm": 2.699272632598877, "learning_rate": 7.731506849315068e-07, "log_odds_chosen": 1.687288761138916, "log_odds_ratio": -0.36290112137794495, "logits/chosen": 0.8981690406799316, "logits/rejected": 0.8563491106033325, "logps/chosen": -2.0593249797821045, "logps/rejected": -3.6841659545898438, "loss": 0.8349, "nll_loss": 0.7986437082290649, "rewards/accuracies": 1.0, "rewards/chosen": -0.20593249797821045, "rewards/margins": 0.1624840795993805, "rewards/rejected": -0.36841657757759094, "step": 1656 }, { "epoch": 4.5366187542778915, "grad_norm": 2.946866512298584, "learning_rate": 7.73013698630137e-07, "log_odds_chosen": 2.560053825378418, "log_odds_ratio": -0.3164827823638916, "logits/chosen": 0.991489052772522, "logits/rejected": 0.9512693285942078, "logps/chosen": -2.357470750808716, "logps/rejected": -4.8127665519714355, "loss": 0.782, "nll_loss": 0.7503580451011658, "rewards/accuracies": 0.875, "rewards/chosen": -0.2357470989227295, "rewards/margins": 0.24552956223487854, "rewards/rejected": -0.48127666115760803, "step": 1657 }, { "epoch": 4.539356605065024, "grad_norm": 2.453578233718872, "learning_rate": 7.72876712328767e-07, "log_odds_chosen": 1.3722577095031738, "log_odds_ratio": -0.26776981353759766, "logits/chosen": 0.8059864044189453, "logits/rejected": 0.7446937561035156, "logps/chosen": -1.6852681636810303, "logps/rejected": -2.8593010902404785, "loss": 0.7635, "nll_loss": 0.7366791367530823, "rewards/accuracies": 1.0, "rewards/chosen": -0.1685268133878708, "rewards/margins": 0.11740328371524811, "rewards/rejected": -0.2859300971031189, "step": 1658 }, { "epoch": 4.5420944558521565, "grad_norm": 3.2209579944610596, "learning_rate": 7.727397260273972e-07, "log_odds_chosen": 1.193603754043579, "log_odds_ratio": -0.5519527196884155, "logits/chosen": 0.7233839631080627, "logits/rejected": 0.6709727644920349, "logps/chosen": -2.172698497772217, "logps/rejected": -3.321122169494629, "loss": 0.8667, "nll_loss": 0.8115254640579224, "rewards/accuracies": 0.625, "rewards/chosen": -0.2172698676586151, "rewards/margins": 0.11484233289957047, "rewards/rejected": -0.33211222290992737, "step": 1659 }, { "epoch": 4.544832306639288, "grad_norm": 2.7268974781036377, "learning_rate": 7.726027397260274e-07, "log_odds_chosen": 1.9325814247131348, "log_odds_ratio": -0.25187650322914124, "logits/chosen": 0.8312755823135376, "logits/rejected": 0.793785035610199, "logps/chosen": -2.0888819694519043, "logps/rejected": -3.8915538787841797, "loss": 0.8681, "nll_loss": 0.8428675532341003, "rewards/accuracies": 1.0, "rewards/chosen": -0.20888817310333252, "rewards/margins": 0.18026724457740784, "rewards/rejected": -0.38915541768074036, "step": 1660 }, { "epoch": 4.547570157426421, "grad_norm": 2.7473583221435547, "learning_rate": 7.724657534246575e-07, "log_odds_chosen": 2.758127450942993, "log_odds_ratio": -0.2063741385936737, "logits/chosen": 0.8738706111907959, "logits/rejected": 0.8697630167007446, "logps/chosen": -2.694406032562256, "logps/rejected": -5.2997870445251465, "loss": 0.8473, "nll_loss": 0.826706051826477, "rewards/accuracies": 1.0, "rewards/chosen": -0.269440621137619, "rewards/margins": 0.2605380713939667, "rewards/rejected": -0.5299786925315857, "step": 1661 }, { "epoch": 4.550308008213553, "grad_norm": 2.411618709564209, "learning_rate": 7.723287671232876e-07, "log_odds_chosen": 2.201575994491577, "log_odds_ratio": -0.2119419276714325, "logits/chosen": 1.1414072513580322, "logits/rejected": 1.1371885538101196, "logps/chosen": -2.10345196723938, "logps/rejected": -4.202047348022461, "loss": 0.7001, "nll_loss": 0.6789522171020508, "rewards/accuracies": 0.875, "rewards/chosen": -0.21034520864486694, "rewards/margins": 0.20985952019691467, "rewards/rejected": -0.420204758644104, "step": 1662 }, { "epoch": 4.553045859000685, "grad_norm": 2.6905272006988525, "learning_rate": 7.721917808219178e-07, "log_odds_chosen": 1.896777868270874, "log_odds_ratio": -0.2700769901275635, "logits/chosen": 1.0444636344909668, "logits/rejected": 1.0139387845993042, "logps/chosen": -2.3644630908966064, "logps/rejected": -4.1459431648254395, "loss": 0.7437, "nll_loss": 0.7167230844497681, "rewards/accuracies": 0.875, "rewards/chosen": -0.2364463061094284, "rewards/margins": 0.17814800143241882, "rewards/rejected": -0.41459429264068604, "step": 1663 }, { "epoch": 4.555783709787817, "grad_norm": 1.9723800420761108, "learning_rate": 7.720547945205479e-07, "log_odds_chosen": 2.089729070663452, "log_odds_ratio": -0.23803170025348663, "logits/chosen": 0.6409896016120911, "logits/rejected": 0.625113844871521, "logps/chosen": -1.5621426105499268, "logps/rejected": -3.484882354736328, "loss": 0.817, "nll_loss": 0.7932284474372864, "rewards/accuracies": 1.0, "rewards/chosen": -0.15621426701545715, "rewards/margins": 0.19227397441864014, "rewards/rejected": -0.3484882414340973, "step": 1664 }, { "epoch": 4.558521560574949, "grad_norm": 2.056476354598999, "learning_rate": 7.71917808219178e-07, "log_odds_chosen": 1.8431004285812378, "log_odds_ratio": -0.26734769344329834, "logits/chosen": 0.8639585971832275, "logits/rejected": 0.7670419216156006, "logps/chosen": -1.9144424200057983, "logps/rejected": -3.598318099975586, "loss": 0.8784, "nll_loss": 0.8516340255737305, "rewards/accuracies": 1.0, "rewards/chosen": -0.1914442479610443, "rewards/margins": 0.16838756203651428, "rewards/rejected": -0.3598318099975586, "step": 1665 }, { "epoch": 4.561259411362081, "grad_norm": 4.543501853942871, "learning_rate": 7.717808219178081e-07, "log_odds_chosen": 0.9173898696899414, "log_odds_ratio": -0.7235804200172424, "logits/chosen": 1.0086684226989746, "logits/rejected": 1.0832537412643433, "logps/chosen": -2.801114320755005, "logps/rejected": -3.584186553955078, "loss": 0.7118, "nll_loss": 0.6394808888435364, "rewards/accuracies": 0.625, "rewards/chosen": -0.2801114320755005, "rewards/margins": 0.07830724120140076, "rewards/rejected": -0.35841867327690125, "step": 1666 }, { "epoch": 4.563997262149213, "grad_norm": 3.4825806617736816, "learning_rate": 7.716438356164383e-07, "log_odds_chosen": 0.6792789697647095, "log_odds_ratio": -0.44811996817588806, "logits/chosen": 0.6013016104698181, "logits/rejected": 0.5384812355041504, "logps/chosen": -2.162715196609497, "logps/rejected": -2.7128915786743164, "loss": 0.8085, "nll_loss": 0.7637374401092529, "rewards/accuracies": 0.875, "rewards/chosen": -0.2162715196609497, "rewards/margins": 0.055017635226249695, "rewards/rejected": -0.2712891697883606, "step": 1667 }, { "epoch": 4.566735112936345, "grad_norm": 2.394700050354004, "learning_rate": 7.715068493150685e-07, "log_odds_chosen": 1.2831017971038818, "log_odds_ratio": -0.3193553388118744, "logits/chosen": 0.5441027283668518, "logits/rejected": 0.4808044731616974, "logps/chosen": -2.182380437850952, "logps/rejected": -3.348004102706909, "loss": 0.886, "nll_loss": 0.854092001914978, "rewards/accuracies": 1.0, "rewards/chosen": -0.21823802590370178, "rewards/margins": 0.11656240373849869, "rewards/rejected": -0.33480045199394226, "step": 1668 }, { "epoch": 4.569472963723477, "grad_norm": 4.293591499328613, "learning_rate": 7.713698630136985e-07, "log_odds_chosen": 1.7117416858673096, "log_odds_ratio": -0.24478641152381897, "logits/chosen": 1.1662687063217163, "logits/rejected": 1.1763710975646973, "logps/chosen": -2.5799379348754883, "logps/rejected": -4.154772758483887, "loss": 0.747, "nll_loss": 0.7225143909454346, "rewards/accuracies": 1.0, "rewards/chosen": -0.25799381732940674, "rewards/margins": 0.15748350322246552, "rewards/rejected": -0.41547730565071106, "step": 1669 }, { "epoch": 4.572210814510609, "grad_norm": 2.5229318141937256, "learning_rate": 7.712328767123287e-07, "log_odds_chosen": 0.9277072548866272, "log_odds_ratio": -0.4438379406929016, "logits/chosen": 1.0251493453979492, "logits/rejected": 0.9806964993476868, "logps/chosen": -2.317762851715088, "logps/rejected": -3.1711366176605225, "loss": 0.7379, "nll_loss": 0.6935554146766663, "rewards/accuracies": 0.875, "rewards/chosen": -0.23177629709243774, "rewards/margins": 0.08533737808465958, "rewards/rejected": -0.3171136677265167, "step": 1670 }, { "epoch": 4.574948665297741, "grad_norm": 3.0661308765411377, "learning_rate": 7.710958904109589e-07, "log_odds_chosen": 2.2534422874450684, "log_odds_ratio": -0.288344144821167, "logits/chosen": 1.1430588960647583, "logits/rejected": 1.2020316123962402, "logps/chosen": -2.55855655670166, "logps/rejected": -4.687041282653809, "loss": 0.7043, "nll_loss": 0.6754447221755981, "rewards/accuracies": 0.875, "rewards/chosen": -0.2558556795120239, "rewards/margins": 0.2128484547138214, "rewards/rejected": -0.46870410442352295, "step": 1671 }, { "epoch": 4.577686516084873, "grad_norm": 2.8228728771209717, "learning_rate": 7.709589041095889e-07, "log_odds_chosen": 1.623754620552063, "log_odds_ratio": -0.23710256814956665, "logits/chosen": 0.6819545030593872, "logits/rejected": 0.6703583598136902, "logps/chosen": -1.6899359226226807, "logps/rejected": -3.0906081199645996, "loss": 0.7753, "nll_loss": 0.7515460252761841, "rewards/accuracies": 1.0, "rewards/chosen": -0.16899359226226807, "rewards/margins": 0.1400672346353531, "rewards/rejected": -0.30906081199645996, "step": 1672 }, { "epoch": 4.5804243668720055, "grad_norm": 2.5136308670043945, "learning_rate": 7.708219178082191e-07, "log_odds_chosen": 1.5874521732330322, "log_odds_ratio": -0.3200458884239197, "logits/chosen": 0.791738748550415, "logits/rejected": 0.723301112651825, "logps/chosen": -2.2497856616973877, "logps/rejected": -3.7347495555877686, "loss": 0.7942, "nll_loss": 0.7622164487838745, "rewards/accuracies": 1.0, "rewards/chosen": -0.22497856616973877, "rewards/margins": 0.1484963744878769, "rewards/rejected": -0.37347495555877686, "step": 1673 }, { "epoch": 4.583162217659138, "grad_norm": 3.17069411277771, "learning_rate": 7.706849315068493e-07, "log_odds_chosen": 2.706563949584961, "log_odds_ratio": -0.4305022656917572, "logits/chosen": 0.8442689180374146, "logits/rejected": 0.8211698532104492, "logps/chosen": -2.373570203781128, "logps/rejected": -4.980380058288574, "loss": 0.8825, "nll_loss": 0.8394527435302734, "rewards/accuracies": 0.875, "rewards/chosen": -0.2373570203781128, "rewards/margins": 0.26068100333213806, "rewards/rejected": -0.49803802371025085, "step": 1674 }, { "epoch": 4.58590006844627, "grad_norm": 2.640676259994507, "learning_rate": 7.705479452054794e-07, "log_odds_chosen": 0.6454839706420898, "log_odds_ratio": -0.47339141368865967, "logits/chosen": 0.6478070020675659, "logits/rejected": 0.5950379371643066, "logps/chosen": -1.5875599384307861, "logps/rejected": -2.1467742919921875, "loss": 0.8231, "nll_loss": 0.7757739424705505, "rewards/accuracies": 0.875, "rewards/chosen": -0.15875600278377533, "rewards/margins": 0.05592144653201103, "rewards/rejected": -0.21467745304107666, "step": 1675 }, { "epoch": 4.588637919233402, "grad_norm": 2.6459789276123047, "learning_rate": 7.704109589041095e-07, "log_odds_chosen": 2.821314811706543, "log_odds_ratio": -0.30605873465538025, "logits/chosen": 1.0326261520385742, "logits/rejected": 1.048183560371399, "logps/chosen": -2.2195520401000977, "logps/rejected": -4.935995578765869, "loss": 0.8056, "nll_loss": 0.7749776840209961, "rewards/accuracies": 0.875, "rewards/chosen": -0.22195522487163544, "rewards/margins": 0.27164435386657715, "rewards/rejected": -0.4935995936393738, "step": 1676 }, { "epoch": 4.591375770020534, "grad_norm": 2.7204787731170654, "learning_rate": 7.702739726027397e-07, "log_odds_chosen": 0.8408331871032715, "log_odds_ratio": -0.6689260601997375, "logits/chosen": 0.9759736061096191, "logits/rejected": 0.9697228670120239, "logps/chosen": -2.677445411682129, "logps/rejected": -3.477755546569824, "loss": 0.885, "nll_loss": 0.818064272403717, "rewards/accuracies": 0.625, "rewards/chosen": -0.2677445709705353, "rewards/margins": 0.08003097772598267, "rewards/rejected": -0.34777551889419556, "step": 1677 }, { "epoch": 4.594113620807666, "grad_norm": 2.09924578666687, "learning_rate": 7.701369863013698e-07, "log_odds_chosen": 2.9842371940612793, "log_odds_ratio": -0.2409607321023941, "logits/chosen": 1.0096555948257446, "logits/rejected": 0.9212539196014404, "logps/chosen": -1.8196375370025635, "logps/rejected": -4.652317047119141, "loss": 0.8249, "nll_loss": 0.8008327484130859, "rewards/accuracies": 0.875, "rewards/chosen": -0.1819637566804886, "rewards/margins": 0.28326791524887085, "rewards/rejected": -0.46523165702819824, "step": 1678 }, { "epoch": 4.596851471594798, "grad_norm": 2.933609962463379, "learning_rate": 7.699999999999999e-07, "log_odds_chosen": 0.9535225629806519, "log_odds_ratio": -0.46643921732902527, "logits/chosen": 0.7658718824386597, "logits/rejected": 0.7393801808357239, "logps/chosen": -2.3683106899261475, "logps/rejected": -3.203037738800049, "loss": 0.8594, "nll_loss": 0.812736988067627, "rewards/accuracies": 0.625, "rewards/chosen": -0.23683106899261475, "rewards/margins": 0.08347268402576447, "rewards/rejected": -0.3203037679195404, "step": 1679 }, { "epoch": 4.59958932238193, "grad_norm": 3.6691527366638184, "learning_rate": 7.6986301369863e-07, "log_odds_chosen": 2.0262954235076904, "log_odds_ratio": -0.45073941349983215, "logits/chosen": 0.6096025705337524, "logits/rejected": 0.608546257019043, "logps/chosen": -2.137831926345825, "logps/rejected": -3.8818373680114746, "loss": 0.8031, "nll_loss": 0.7580597400665283, "rewards/accuracies": 0.875, "rewards/chosen": -0.21378320455551147, "rewards/margins": 0.17440055310726166, "rewards/rejected": -0.38818374276161194, "step": 1680 }, { "epoch": 4.602327173169062, "grad_norm": 2.1188650131225586, "learning_rate": 7.697260273972602e-07, "log_odds_chosen": 2.423102855682373, "log_odds_ratio": -0.27771174907684326, "logits/chosen": 0.9953500032424927, "logits/rejected": 1.0017175674438477, "logps/chosen": -2.1873435974121094, "logps/rejected": -4.509706497192383, "loss": 0.7415, "nll_loss": 0.7136789560317993, "rewards/accuracies": 0.75, "rewards/chosen": -0.21873435378074646, "rewards/margins": 0.23223631083965302, "rewards/rejected": -0.45097067952156067, "step": 1681 }, { "epoch": 4.605065023956194, "grad_norm": 2.1476528644561768, "learning_rate": 7.695890410958904e-07, "log_odds_chosen": 1.5462236404418945, "log_odds_ratio": -0.32587704062461853, "logits/chosen": 0.5757254362106323, "logits/rejected": 0.5783795118331909, "logps/chosen": -1.7427349090576172, "logps/rejected": -3.169825315475464, "loss": 0.8777, "nll_loss": 0.8450638651847839, "rewards/accuracies": 1.0, "rewards/chosen": -0.17427349090576172, "rewards/margins": 0.14270903170108795, "rewards/rejected": -0.3169825077056885, "step": 1682 }, { "epoch": 4.607802874743326, "grad_norm": 2.4922285079956055, "learning_rate": 7.694520547945204e-07, "log_odds_chosen": 0.8570510149002075, "log_odds_ratio": -0.6282563805580139, "logits/chosen": 0.7394979000091553, "logits/rejected": 0.7296879887580872, "logps/chosen": -1.8577909469604492, "logps/rejected": -2.7135701179504395, "loss": 0.8866, "nll_loss": 0.8237906694412231, "rewards/accuracies": 0.5, "rewards/chosen": -0.18577909469604492, "rewards/margins": 0.08557794243097305, "rewards/rejected": -0.2713570296764374, "step": 1683 }, { "epoch": 4.610540725530458, "grad_norm": 4.441031455993652, "learning_rate": 7.693150684931506e-07, "log_odds_chosen": 1.0712171792984009, "log_odds_ratio": -0.4283849000930786, "logits/chosen": 0.8596657514572144, "logits/rejected": 0.8272309303283691, "logps/chosen": -2.3173327445983887, "logps/rejected": -3.2767679691314697, "loss": 0.8447, "nll_loss": 0.8018269538879395, "rewards/accuracies": 0.875, "rewards/chosen": -0.2317332923412323, "rewards/margins": 0.09594354033470154, "rewards/rejected": -0.32767683267593384, "step": 1684 }, { "epoch": 4.61327857631759, "grad_norm": 2.2871410846710205, "learning_rate": 7.691780821917808e-07, "log_odds_chosen": 2.4746155738830566, "log_odds_ratio": -0.24570664763450623, "logits/chosen": 0.8727943301200867, "logits/rejected": 0.8528779745101929, "logps/chosen": -2.067615270614624, "logps/rejected": -4.392459869384766, "loss": 0.7775, "nll_loss": 0.7529303431510925, "rewards/accuracies": 1.0, "rewards/chosen": -0.20676152408123016, "rewards/margins": 0.23248443007469177, "rewards/rejected": -0.4392459988594055, "step": 1685 }, { "epoch": 4.616016427104723, "grad_norm": 2.3362650871276855, "learning_rate": 7.690410958904108e-07, "log_odds_chosen": 1.8276875019073486, "log_odds_ratio": -0.2660280168056488, "logits/chosen": 0.697452187538147, "logits/rejected": 0.6789982914924622, "logps/chosen": -1.7336511611938477, "logps/rejected": -3.387352228164673, "loss": 0.8187, "nll_loss": 0.7920939326286316, "rewards/accuracies": 1.0, "rewards/chosen": -0.17336511611938477, "rewards/margins": 0.16537010669708252, "rewards/rejected": -0.3387352228164673, "step": 1686 }, { "epoch": 4.618754277891854, "grad_norm": 2.6582772731781006, "learning_rate": 7.68904109589041e-07, "log_odds_chosen": 1.1347957849502563, "log_odds_ratio": -0.3764761686325073, "logits/chosen": 0.6695644855499268, "logits/rejected": 0.5853208303451538, "logps/chosen": -1.7896409034729004, "logps/rejected": -2.797933340072632, "loss": 0.9065, "nll_loss": 0.8688524961471558, "rewards/accuracies": 0.875, "rewards/chosen": -0.17896407842636108, "rewards/margins": 0.10082924365997314, "rewards/rejected": -0.27979332208633423, "step": 1687 }, { "epoch": 4.621492128678987, "grad_norm": 2.239619016647339, "learning_rate": 7.687671232876712e-07, "log_odds_chosen": 1.2109439373016357, "log_odds_ratio": -0.32274916768074036, "logits/chosen": 0.6859834790229797, "logits/rejected": 0.589412271976471, "logps/chosen": -2.246026039123535, "logps/rejected": -3.364093780517578, "loss": 0.8348, "nll_loss": 0.8025628328323364, "rewards/accuracies": 1.0, "rewards/chosen": -0.224602609872818, "rewards/margins": 0.11180678009986877, "rewards/rejected": -0.33640938997268677, "step": 1688 }, { "epoch": 4.6242299794661195, "grad_norm": 3.0695605278015137, "learning_rate": 7.686301369863013e-07, "log_odds_chosen": 0.869003176689148, "log_odds_ratio": -0.4562307894229889, "logits/chosen": 0.7943333983421326, "logits/rejected": 0.8033030033111572, "logps/chosen": -2.667289972305298, "logps/rejected": -3.4344420433044434, "loss": 0.7675, "nll_loss": 0.7218692898750305, "rewards/accuracies": 0.75, "rewards/chosen": -0.2667289972305298, "rewards/margins": 0.07671518623828888, "rewards/rejected": -0.34344419836997986, "step": 1689 }, { "epoch": 4.6269678302532515, "grad_norm": 1.8872573375701904, "learning_rate": 7.684931506849314e-07, "log_odds_chosen": 2.9712605476379395, "log_odds_ratio": -0.1657249629497528, "logits/chosen": 0.9818606972694397, "logits/rejected": 0.9338613748550415, "logps/chosen": -1.855047583580017, "logps/rejected": -4.637012481689453, "loss": 0.7781, "nll_loss": 0.7614905834197998, "rewards/accuracies": 1.0, "rewards/chosen": -0.18550477921962738, "rewards/margins": 0.27819645404815674, "rewards/rejected": -0.4637012481689453, "step": 1690 }, { "epoch": 4.629705681040384, "grad_norm": 2.2441065311431885, "learning_rate": 7.683561643835617e-07, "log_odds_chosen": 2.2645325660705566, "log_odds_ratio": -0.2586840093135834, "logits/chosen": 0.7877151966094971, "logits/rejected": 0.7969861030578613, "logps/chosen": -1.8731895685195923, "logps/rejected": -3.959768295288086, "loss": 0.8126, "nll_loss": 0.7867330312728882, "rewards/accuracies": 1.0, "rewards/chosen": -0.18731895089149475, "rewards/margins": 0.208657905459404, "rewards/rejected": -0.39597684144973755, "step": 1691 }, { "epoch": 4.632443531827516, "grad_norm": 2.463968515396118, "learning_rate": 7.682191780821918e-07, "log_odds_chosen": 1.7810990810394287, "log_odds_ratio": -0.3051481246948242, "logits/chosen": 0.5827440023422241, "logits/rejected": 0.49777162075042725, "logps/chosen": -1.4234907627105713, "logps/rejected": -3.0393991470336914, "loss": 0.7984, "nll_loss": 0.7678840756416321, "rewards/accuracies": 1.0, "rewards/chosen": -0.14234907925128937, "rewards/margins": 0.1615908294916153, "rewards/rejected": -0.30393990874290466, "step": 1692 }, { "epoch": 4.635181382614648, "grad_norm": 3.266535758972168, "learning_rate": 7.680821917808219e-07, "log_odds_chosen": 1.6693443059921265, "log_odds_ratio": -0.3609768748283386, "logits/chosen": 0.8184600472450256, "logits/rejected": 0.7301950454711914, "logps/chosen": -1.8932297229766846, "logps/rejected": -3.3804712295532227, "loss": 0.8391, "nll_loss": 0.803016722202301, "rewards/accuracies": 1.0, "rewards/chosen": -0.18932297825813293, "rewards/margins": 0.14872412383556366, "rewards/rejected": -0.3380470871925354, "step": 1693 }, { "epoch": 4.63791923340178, "grad_norm": 2.748448610305786, "learning_rate": 7.679452054794521e-07, "log_odds_chosen": 1.2758541107177734, "log_odds_ratio": -0.4789772033691406, "logits/chosen": 0.7494779825210571, "logits/rejected": 0.7500258088111877, "logps/chosen": -2.297215223312378, "logps/rejected": -3.51505446434021, "loss": 0.8703, "nll_loss": 0.822396993637085, "rewards/accuracies": 0.75, "rewards/chosen": -0.22972151637077332, "rewards/margins": 0.12178393453359604, "rewards/rejected": -0.35150545835494995, "step": 1694 }, { "epoch": 4.640657084188912, "grad_norm": 2.311966896057129, "learning_rate": 7.678082191780822e-07, "log_odds_chosen": 0.9437525868415833, "log_odds_ratio": -0.3818976879119873, "logits/chosen": 0.7144303321838379, "logits/rejected": 0.6558924913406372, "logps/chosen": -1.6804553270339966, "logps/rejected": -2.4937222003936768, "loss": 0.8206, "nll_loss": 0.7823702096939087, "rewards/accuracies": 0.875, "rewards/chosen": -0.1680455356836319, "rewards/margins": 0.0813266932964325, "rewards/rejected": -0.2493722289800644, "step": 1695 }, { "epoch": 4.643394934976044, "grad_norm": 4.118440628051758, "learning_rate": 7.676712328767124e-07, "log_odds_chosen": 0.8166524171829224, "log_odds_ratio": -0.6962124109268188, "logits/chosen": 0.47938403487205505, "logits/rejected": 0.39424991607666016, "logps/chosen": -2.1800708770751953, "logps/rejected": -2.8959553241729736, "loss": 0.9244, "nll_loss": 0.8547460436820984, "rewards/accuracies": 0.75, "rewards/chosen": -0.21800708770751953, "rewards/margins": 0.07158844918012619, "rewards/rejected": -0.2895955443382263, "step": 1696 }, { "epoch": 4.646132785763176, "grad_norm": 3.13455867767334, "learning_rate": 7.675342465753424e-07, "log_odds_chosen": 0.9389269351959229, "log_odds_ratio": -0.6254783272743225, "logits/chosen": 0.9079597592353821, "logits/rejected": 0.8535298705101013, "logps/chosen": -2.8470652103424072, "logps/rejected": -3.763237953186035, "loss": 0.9125, "nll_loss": 0.8499044179916382, "rewards/accuracies": 0.625, "rewards/chosen": -0.2847065329551697, "rewards/margins": 0.09161725640296936, "rewards/rejected": -0.37632375955581665, "step": 1697 }, { "epoch": 4.648870636550308, "grad_norm": 2.1565053462982178, "learning_rate": 7.673972602739726e-07, "log_odds_chosen": 1.180728554725647, "log_odds_ratio": -0.371354341506958, "logits/chosen": 0.7710468173027039, "logits/rejected": 0.714739978313446, "logps/chosen": -1.5759183168411255, "logps/rejected": -2.6256017684936523, "loss": 0.7735, "nll_loss": 0.7363187074661255, "rewards/accuracies": 0.875, "rewards/chosen": -0.15759184956550598, "rewards/margins": 0.10496833920478821, "rewards/rejected": -0.2625601887702942, "step": 1698 }, { "epoch": 4.65160848733744, "grad_norm": 2.768203020095825, "learning_rate": 7.672602739726028e-07, "log_odds_chosen": 0.9726216793060303, "log_odds_ratio": -0.3642117977142334, "logits/chosen": 0.9817322492599487, "logits/rejected": 0.9399847388267517, "logps/chosen": -2.0497524738311768, "logps/rejected": -2.923801898956299, "loss": 0.7608, "nll_loss": 0.7243906259536743, "rewards/accuracies": 1.0, "rewards/chosen": -0.20497524738311768, "rewards/margins": 0.08740497380495071, "rewards/rejected": -0.2923802137374878, "step": 1699 }, { "epoch": 4.654346338124572, "grad_norm": 2.811544418334961, "learning_rate": 7.671232876712328e-07, "log_odds_chosen": 2.0160350799560547, "log_odds_ratio": -0.2006724327802658, "logits/chosen": 0.8509256839752197, "logits/rejected": 0.8617376089096069, "logps/chosen": -1.7748535871505737, "logps/rejected": -3.5915071964263916, "loss": 0.8349, "nll_loss": 0.814875066280365, "rewards/accuracies": 1.0, "rewards/chosen": -0.17748534679412842, "rewards/margins": 0.1816653609275818, "rewards/rejected": -0.3591507077217102, "step": 1700 }, { "epoch": 4.657084188911704, "grad_norm": 3.003607749938965, "learning_rate": 7.66986301369863e-07, "log_odds_chosen": 1.5853384733200073, "log_odds_ratio": -0.281663715839386, "logits/chosen": 0.9854801893234253, "logits/rejected": 0.9845505356788635, "logps/chosen": -2.9075119495391846, "logps/rejected": -4.382072448730469, "loss": 0.7386, "nll_loss": 0.7103880047798157, "rewards/accuracies": 1.0, "rewards/chosen": -0.29075124859809875, "rewards/margins": 0.14745602011680603, "rewards/rejected": -0.4382071793079376, "step": 1701 }, { "epoch": 4.659822039698836, "grad_norm": 3.037649154663086, "learning_rate": 7.668493150684932e-07, "log_odds_chosen": 1.6528348922729492, "log_odds_ratio": -0.3116680383682251, "logits/chosen": 1.0824627876281738, "logits/rejected": 1.088281273841858, "logps/chosen": -2.3311784267425537, "logps/rejected": -3.7812955379486084, "loss": 0.6901, "nll_loss": 0.6588946580886841, "rewards/accuracies": 0.875, "rewards/chosen": -0.23311786353588104, "rewards/margins": 0.14501169323921204, "rewards/rejected": -0.3781295716762543, "step": 1702 }, { "epoch": 4.662559890485968, "grad_norm": 3.249427556991577, "learning_rate": 7.667123287671233e-07, "log_odds_chosen": 1.675828456878662, "log_odds_ratio": -0.5062876343727112, "logits/chosen": 0.6857750415802002, "logits/rejected": 0.7067500352859497, "logps/chosen": -3.57614803314209, "logps/rejected": -5.2119221687316895, "loss": 0.8635, "nll_loss": 0.8128416538238525, "rewards/accuracies": 0.75, "rewards/chosen": -0.3576148450374603, "rewards/margins": 0.1635773777961731, "rewards/rejected": -0.521192193031311, "step": 1703 }, { "epoch": 4.6652977412731005, "grad_norm": 4.491857051849365, "learning_rate": 7.665753424657534e-07, "log_odds_chosen": 1.0724289417266846, "log_odds_ratio": -0.7280611395835876, "logits/chosen": 0.7385209202766418, "logits/rejected": 0.707283616065979, "logps/chosen": -2.2958288192749023, "logps/rejected": -3.303349018096924, "loss": 0.9032, "nll_loss": 0.8304151296615601, "rewards/accuracies": 0.75, "rewards/chosen": -0.22958290576934814, "rewards/margins": 0.10075202584266663, "rewards/rejected": -0.3303349018096924, "step": 1704 }, { "epoch": 4.6680355920602326, "grad_norm": 2.083249092102051, "learning_rate": 7.664383561643836e-07, "log_odds_chosen": 2.773941993713379, "log_odds_ratio": -0.16271448135375977, "logits/chosen": 0.9163010120391846, "logits/rejected": 0.9010173678398132, "logps/chosen": -1.749251365661621, "logps/rejected": -4.348627090454102, "loss": 0.7494, "nll_loss": 0.733115553855896, "rewards/accuracies": 1.0, "rewards/chosen": -0.17492514848709106, "rewards/margins": 0.259937584400177, "rewards/rejected": -0.4348627030849457, "step": 1705 }, { "epoch": 4.670773442847365, "grad_norm": 2.4155166149139404, "learning_rate": 7.663013698630137e-07, "log_odds_chosen": 1.927530288696289, "log_odds_ratio": -0.22734329104423523, "logits/chosen": 0.6923637986183167, "logits/rejected": 0.6482526063919067, "logps/chosen": -1.3939663171768188, "logps/rejected": -3.1039018630981445, "loss": 0.7768, "nll_loss": 0.7540924549102783, "rewards/accuracies": 1.0, "rewards/chosen": -0.13939663767814636, "rewards/margins": 0.17099355161190033, "rewards/rejected": -0.3103901743888855, "step": 1706 }, { "epoch": 4.673511293634497, "grad_norm": 2.586186408996582, "learning_rate": 7.661643835616438e-07, "log_odds_chosen": 1.564323902130127, "log_odds_ratio": -0.3046755790710449, "logits/chosen": 0.6824505925178528, "logits/rejected": 0.6955944895744324, "logps/chosen": -2.0550312995910645, "logps/rejected": -3.4677581787109375, "loss": 0.7649, "nll_loss": 0.7344608306884766, "rewards/accuracies": 0.875, "rewards/chosen": -0.20550313591957092, "rewards/margins": 0.14127269387245178, "rewards/rejected": -0.3467758297920227, "step": 1707 }, { "epoch": 4.676249144421629, "grad_norm": 2.6757924556732178, "learning_rate": 7.66027397260274e-07, "log_odds_chosen": 2.1194465160369873, "log_odds_ratio": -0.18417543172836304, "logits/chosen": 1.0224558115005493, "logits/rejected": 1.0323090553283691, "logps/chosen": -1.8652831315994263, "logps/rejected": -3.7445783615112305, "loss": 0.6984, "nll_loss": 0.6799799799919128, "rewards/accuracies": 1.0, "rewards/chosen": -0.18652832508087158, "rewards/margins": 0.18792951107025146, "rewards/rejected": -0.37445783615112305, "step": 1708 }, { "epoch": 4.678986995208761, "grad_norm": 2.344658851623535, "learning_rate": 7.658904109589041e-07, "log_odds_chosen": 1.5186426639556885, "log_odds_ratio": -0.28144657611846924, "logits/chosen": 0.8383961915969849, "logits/rejected": 0.8208203315734863, "logps/chosen": -2.0598955154418945, "logps/rejected": -3.4294440746307373, "loss": 0.8154, "nll_loss": 0.787266731262207, "rewards/accuracies": 0.875, "rewards/chosen": -0.2059895545244217, "rewards/margins": 0.1369548738002777, "rewards/rejected": -0.3429444432258606, "step": 1709 }, { "epoch": 4.681724845995893, "grad_norm": 3.381084680557251, "learning_rate": 7.657534246575343e-07, "log_odds_chosen": 0.4764753580093384, "log_odds_ratio": -0.9113661050796509, "logits/chosen": 0.5082531571388245, "logits/rejected": 0.484417587518692, "logps/chosen": -2.0555830001831055, "logps/rejected": -2.5019586086273193, "loss": 0.9801, "nll_loss": 0.8889869451522827, "rewards/accuracies": 0.75, "rewards/chosen": -0.20555830001831055, "rewards/margins": 0.04463755711913109, "rewards/rejected": -0.25019586086273193, "step": 1710 }, { "epoch": 4.684462696783025, "grad_norm": 5.31044864654541, "learning_rate": 7.656164383561643e-07, "log_odds_chosen": 0.3923037648200989, "log_odds_ratio": -0.7031712532043457, "logits/chosen": 0.8053627014160156, "logits/rejected": 0.7821846008300781, "logps/chosen": -2.9690136909484863, "logps/rejected": -3.2979393005371094, "loss": 0.8784, "nll_loss": 0.8081175684928894, "rewards/accuracies": 0.75, "rewards/chosen": -0.2969013452529907, "rewards/margins": 0.032892581075429916, "rewards/rejected": -0.32979393005371094, "step": 1711 }, { "epoch": 4.687200547570157, "grad_norm": 2.862917423248291, "learning_rate": 7.654794520547945e-07, "log_odds_chosen": 2.0017454624176025, "log_odds_ratio": -0.3961893320083618, "logits/chosen": 1.1425542831420898, "logits/rejected": 1.1244964599609375, "logps/chosen": -2.1230075359344482, "logps/rejected": -3.883680820465088, "loss": 0.7299, "nll_loss": 0.6903200745582581, "rewards/accuracies": 0.75, "rewards/chosen": -0.21230074763298035, "rewards/margins": 0.1760673224925995, "rewards/rejected": -0.3883680999279022, "step": 1712 }, { "epoch": 4.68993839835729, "grad_norm": 2.016965866088867, "learning_rate": 7.653424657534247e-07, "log_odds_chosen": 1.528618335723877, "log_odds_ratio": -0.3877922594547272, "logits/chosen": 0.7682095766067505, "logits/rejected": 0.5966244339942932, "logps/chosen": -1.834464192390442, "logps/rejected": -3.216090679168701, "loss": 0.8349, "nll_loss": 0.7961033582687378, "rewards/accuracies": 0.75, "rewards/chosen": -0.18344640731811523, "rewards/margins": 0.13816265761852264, "rewards/rejected": -0.3216090798377991, "step": 1713 }, { "epoch": 4.692676249144421, "grad_norm": 2.9932496547698975, "learning_rate": 7.652054794520547e-07, "log_odds_chosen": 2.3447425365448, "log_odds_ratio": -0.3210315406322479, "logits/chosen": 0.9994786977767944, "logits/rejected": 1.1042903661727905, "logps/chosen": -1.975606918334961, "logps/rejected": -4.155834674835205, "loss": 0.777, "nll_loss": 0.7449015378952026, "rewards/accuracies": 0.75, "rewards/chosen": -0.19756069779396057, "rewards/margins": 0.21802276372909546, "rewards/rejected": -0.41558346152305603, "step": 1714 }, { "epoch": 4.695414099931554, "grad_norm": 2.3979482650756836, "learning_rate": 7.650684931506849e-07, "log_odds_chosen": 1.548676609992981, "log_odds_ratio": -0.2507539391517639, "logits/chosen": 0.7194259166717529, "logits/rejected": 0.7145256996154785, "logps/chosen": -2.198500156402588, "logps/rejected": -3.648023843765259, "loss": 0.7163, "nll_loss": 0.6912131905555725, "rewards/accuracies": 0.875, "rewards/chosen": -0.21985003352165222, "rewards/margins": 0.14495235681533813, "rewards/rejected": -0.36480236053466797, "step": 1715 }, { "epoch": 4.698151950718686, "grad_norm": 1.9892303943634033, "learning_rate": 7.649315068493151e-07, "log_odds_chosen": 2.659254550933838, "log_odds_ratio": -0.14848141372203827, "logits/chosen": 0.8524017930030823, "logits/rejected": 0.7667979001998901, "logps/chosen": -1.4538779258728027, "logps/rejected": -3.8610572814941406, "loss": 0.7127, "nll_loss": 0.6978436708450317, "rewards/accuracies": 1.0, "rewards/chosen": -0.14538778364658356, "rewards/margins": 0.24071797728538513, "rewards/rejected": -0.3861057758331299, "step": 1716 }, { "epoch": 4.700889801505818, "grad_norm": 2.8391871452331543, "learning_rate": 7.647945205479452e-07, "log_odds_chosen": 0.61781907081604, "log_odds_ratio": -0.5281040668487549, "logits/chosen": 0.5014818906784058, "logits/rejected": 0.44908490777015686, "logps/chosen": -3.4610660076141357, "logps/rejected": -4.029287338256836, "loss": 0.92, "nll_loss": 0.8671947717666626, "rewards/accuracies": 0.625, "rewards/chosen": -0.3461065888404846, "rewards/margins": 0.05682211369276047, "rewards/rejected": -0.4029287099838257, "step": 1717 }, { "epoch": 4.70362765229295, "grad_norm": 2.5273399353027344, "learning_rate": 7.646575342465753e-07, "log_odds_chosen": 1.859625220298767, "log_odds_ratio": -0.21424254775047302, "logits/chosen": 0.8959857225418091, "logits/rejected": 0.8949918746948242, "logps/chosen": -2.208284854888916, "logps/rejected": -3.9395463466644287, "loss": 0.7241, "nll_loss": 0.7026281952857971, "rewards/accuracies": 1.0, "rewards/chosen": -0.22082847356796265, "rewards/margins": 0.17312614619731903, "rewards/rejected": -0.39395463466644287, "step": 1718 }, { "epoch": 4.706365503080082, "grad_norm": 3.356581449508667, "learning_rate": 7.645205479452055e-07, "log_odds_chosen": 0.7858392000198364, "log_odds_ratio": -0.7145682573318481, "logits/chosen": 0.7865284085273743, "logits/rejected": 0.7310059070587158, "logps/chosen": -2.1156673431396484, "logps/rejected": -2.7640092372894287, "loss": 0.9683, "nll_loss": 0.8968061208724976, "rewards/accuracies": 0.625, "rewards/chosen": -0.2115667164325714, "rewards/margins": 0.06483419239521027, "rewards/rejected": -0.27640092372894287, "step": 1719 }, { "epoch": 4.7091033538672145, "grad_norm": 7.43085241317749, "learning_rate": 7.643835616438356e-07, "log_odds_chosen": 0.8475263714790344, "log_odds_ratio": -0.6484062075614929, "logits/chosen": 0.8394031524658203, "logits/rejected": 0.8172135949134827, "logps/chosen": -2.266880750656128, "logps/rejected": -2.8858113288879395, "loss": 0.7471, "nll_loss": 0.682247519493103, "rewards/accuracies": 0.875, "rewards/chosen": -0.22668807208538055, "rewards/margins": 0.06189305707812309, "rewards/rejected": -0.28858113288879395, "step": 1720 }, { "epoch": 4.7118412046543465, "grad_norm": 2.6436781883239746, "learning_rate": 7.642465753424657e-07, "log_odds_chosen": 2.405449867248535, "log_odds_ratio": -0.24339018762111664, "logits/chosen": 1.0099148750305176, "logits/rejected": 1.0165385007858276, "logps/chosen": -2.4382071495056152, "logps/rejected": -4.651444911956787, "loss": 0.8088, "nll_loss": 0.784416913986206, "rewards/accuracies": 0.875, "rewards/chosen": -0.24382072687149048, "rewards/margins": 0.22132375836372375, "rewards/rejected": -0.4651445150375366, "step": 1721 }, { "epoch": 4.714579055441479, "grad_norm": 3.508089303970337, "learning_rate": 7.641095890410959e-07, "log_odds_chosen": 1.4139132499694824, "log_odds_ratio": -0.41864868998527527, "logits/chosen": 0.944419801235199, "logits/rejected": 0.9687903523445129, "logps/chosen": -3.2687904834747314, "logps/rejected": -4.5900397300720215, "loss": 0.7652, "nll_loss": 0.7233548760414124, "rewards/accuracies": 0.875, "rewards/chosen": -0.32687908411026, "rewards/margins": 0.1321249008178711, "rewards/rejected": -0.4590039849281311, "step": 1722 }, { "epoch": 4.717316906228611, "grad_norm": 2.5567007064819336, "learning_rate": 7.63972602739726e-07, "log_odds_chosen": 1.2424416542053223, "log_odds_ratio": -0.34384655952453613, "logits/chosen": 0.911442756652832, "logits/rejected": 0.9145042896270752, "logps/chosen": -2.333024501800537, "logps/rejected": -3.4924023151397705, "loss": 0.9563, "nll_loss": 0.921957790851593, "rewards/accuracies": 0.875, "rewards/chosen": -0.23330247402191162, "rewards/margins": 0.11593779921531677, "rewards/rejected": -0.349240243434906, "step": 1723 }, { "epoch": 4.720054757015743, "grad_norm": 2.3400447368621826, "learning_rate": 7.638356164383562e-07, "log_odds_chosen": 1.580213189125061, "log_odds_ratio": -0.27021273970603943, "logits/chosen": 0.8252767324447632, "logits/rejected": 0.7761781215667725, "logps/chosen": -1.900510549545288, "logps/rejected": -3.339101791381836, "loss": 0.7983, "nll_loss": 0.7713197469711304, "rewards/accuracies": 1.0, "rewards/chosen": -0.19005104899406433, "rewards/margins": 0.1438591629266739, "rewards/rejected": -0.3339102268218994, "step": 1724 }, { "epoch": 4.722792607802875, "grad_norm": 2.315945863723755, "learning_rate": 7.636986301369863e-07, "log_odds_chosen": 1.1627516746520996, "log_odds_ratio": -0.38031303882598877, "logits/chosen": 0.5770621299743652, "logits/rejected": 0.5558469891548157, "logps/chosen": -1.8946336507797241, "logps/rejected": -2.989093780517578, "loss": 0.8351, "nll_loss": 0.7970889806747437, "rewards/accuracies": 0.875, "rewards/chosen": -0.18946337699890137, "rewards/margins": 0.10944601893424988, "rewards/rejected": -0.29890939593315125, "step": 1725 }, { "epoch": 4.725530458590007, "grad_norm": 2.642078161239624, "learning_rate": 7.635616438356164e-07, "log_odds_chosen": 1.6320178508758545, "log_odds_ratio": -0.40859290957450867, "logits/chosen": 0.6681405305862427, "logits/rejected": 0.6702319383621216, "logps/chosen": -2.5481953620910645, "logps/rejected": -4.100061893463135, "loss": 0.8453, "nll_loss": 0.8044048547744751, "rewards/accuracies": 0.875, "rewards/chosen": -0.2548195421695709, "rewards/margins": 0.15518665313720703, "rewards/rejected": -0.41000622510910034, "step": 1726 }, { "epoch": 4.728268309377139, "grad_norm": 3.767721652984619, "learning_rate": 7.634246575342466e-07, "log_odds_chosen": 1.5775610208511353, "log_odds_ratio": -0.5569801926612854, "logits/chosen": 0.9937713742256165, "logits/rejected": 1.046020269393921, "logps/chosen": -2.024874687194824, "logps/rejected": -3.5111851692199707, "loss": 0.7768, "nll_loss": 0.7211055755615234, "rewards/accuracies": 0.625, "rewards/chosen": -0.2024874985218048, "rewards/margins": 0.1486310213804245, "rewards/rejected": -0.3511185050010681, "step": 1727 }, { "epoch": 4.731006160164271, "grad_norm": 2.1614413261413574, "learning_rate": 7.632876712328766e-07, "log_odds_chosen": 1.2414671182632446, "log_odds_ratio": -0.29926925897598267, "logits/chosen": 0.783842921257019, "logits/rejected": 0.752037525177002, "logps/chosen": -1.6773548126220703, "logps/rejected": -2.780184507369995, "loss": 0.775, "nll_loss": 0.7450255751609802, "rewards/accuracies": 1.0, "rewards/chosen": -0.1677354872226715, "rewards/margins": 0.11028297245502472, "rewards/rejected": -0.27801844477653503, "step": 1728 }, { "epoch": 4.733744010951403, "grad_norm": 2.254195213317871, "learning_rate": 7.631506849315068e-07, "log_odds_chosen": 2.379396915435791, "log_odds_ratio": -0.19686582684516907, "logits/chosen": 0.9958316087722778, "logits/rejected": 1.003493070602417, "logps/chosen": -1.9332369565963745, "logps/rejected": -4.172369956970215, "loss": 0.6693, "nll_loss": 0.6495850682258606, "rewards/accuracies": 1.0, "rewards/chosen": -0.19332370162010193, "rewards/margins": 0.2239132821559906, "rewards/rejected": -0.41723698377609253, "step": 1729 }, { "epoch": 4.736481861738535, "grad_norm": 2.3276968002319336, "learning_rate": 7.63013698630137e-07, "log_odds_chosen": 2.3050198554992676, "log_odds_ratio": -0.2663997709751129, "logits/chosen": 0.7123305797576904, "logits/rejected": 0.6799457669258118, "logps/chosen": -1.651162028312683, "logps/rejected": -3.8117282390594482, "loss": 0.7754, "nll_loss": 0.7487225532531738, "rewards/accuracies": 1.0, "rewards/chosen": -0.16511622071266174, "rewards/margins": 0.21605663001537323, "rewards/rejected": -0.3811728358268738, "step": 1730 }, { "epoch": 4.739219712525667, "grad_norm": 2.6885156631469727, "learning_rate": 7.62876712328767e-07, "log_odds_chosen": 1.7166802883148193, "log_odds_ratio": -0.40292251110076904, "logits/chosen": 0.923897385597229, "logits/rejected": 0.9434596300125122, "logps/chosen": -2.101180076599121, "logps/rejected": -3.7323622703552246, "loss": 0.8039, "nll_loss": 0.7635918259620667, "rewards/accuracies": 0.875, "rewards/chosen": -0.21011802554130554, "rewards/margins": 0.1631181836128235, "rewards/rejected": -0.3732362389564514, "step": 1731 }, { "epoch": 4.741957563312799, "grad_norm": 2.153825044631958, "learning_rate": 7.627397260273972e-07, "log_odds_chosen": 2.763399362564087, "log_odds_ratio": -0.21727094054222107, "logits/chosen": 0.6954401135444641, "logits/rejected": 0.673528790473938, "logps/chosen": -1.740283727645874, "logps/rejected": -4.3471550941467285, "loss": 0.82, "nll_loss": 0.7983102202415466, "rewards/accuracies": 1.0, "rewards/chosen": -0.17402836680412292, "rewards/margins": 0.2606871426105499, "rewards/rejected": -0.43471547961235046, "step": 1732 }, { "epoch": 4.744695414099931, "grad_norm": 2.8647892475128174, "learning_rate": 7.626027397260274e-07, "log_odds_chosen": 0.8629151582717896, "log_odds_ratio": -0.3977915644645691, "logits/chosen": 0.8714504837989807, "logits/rejected": 0.8211888074874878, "logps/chosen": -2.6242752075195312, "logps/rejected": -3.379019021987915, "loss": 0.8049, "nll_loss": 0.765139102935791, "rewards/accuracies": 0.875, "rewards/chosen": -0.26242750883102417, "rewards/margins": 0.07547441124916077, "rewards/rejected": -0.33790189027786255, "step": 1733 }, { "epoch": 4.747433264887063, "grad_norm": 3.4332733154296875, "learning_rate": 7.624657534246575e-07, "log_odds_chosen": 2.0852549076080322, "log_odds_ratio": -0.27475792169570923, "logits/chosen": 1.0642242431640625, "logits/rejected": 1.0996501445770264, "logps/chosen": -2.5609116554260254, "logps/rejected": -4.561419486999512, "loss": 0.7522, "nll_loss": 0.7247607707977295, "rewards/accuracies": 0.875, "rewards/chosen": -0.2560911774635315, "rewards/margins": 0.20005083084106445, "rewards/rejected": -0.45614197850227356, "step": 1734 }, { "epoch": 4.7501711156741955, "grad_norm": 3.893117904663086, "learning_rate": 7.623287671232876e-07, "log_odds_chosen": 1.3330802917480469, "log_odds_ratio": -0.5053923726081848, "logits/chosen": 0.7534478902816772, "logits/rejected": 0.7737859487533569, "logps/chosen": -2.557272434234619, "logps/rejected": -3.768355369567871, "loss": 0.8014, "nll_loss": 0.7508417367935181, "rewards/accuracies": 0.875, "rewards/chosen": -0.25572726130485535, "rewards/margins": 0.12110830843448639, "rewards/rejected": -0.37683555483818054, "step": 1735 }, { "epoch": 4.7529089664613275, "grad_norm": 2.9387319087982178, "learning_rate": 7.621917808219178e-07, "log_odds_chosen": 1.9028240442276, "log_odds_ratio": -0.27158322930336, "logits/chosen": 1.1204328536987305, "logits/rejected": 1.1407737731933594, "logps/chosen": -2.368500232696533, "logps/rejected": -4.136209964752197, "loss": 0.6934, "nll_loss": 0.6662434339523315, "rewards/accuracies": 0.875, "rewards/chosen": -0.23684999346733093, "rewards/margins": 0.17677101492881775, "rewards/rejected": -0.4136210083961487, "step": 1736 }, { "epoch": 4.75564681724846, "grad_norm": 2.3791146278381348, "learning_rate": 7.620547945205479e-07, "log_odds_chosen": 1.8712027072906494, "log_odds_ratio": -0.26979514956474304, "logits/chosen": 0.941551148891449, "logits/rejected": 0.943042516708374, "logps/chosen": -1.7326489686965942, "logps/rejected": -3.4663045406341553, "loss": 0.7331, "nll_loss": 0.7061071395874023, "rewards/accuracies": 0.75, "rewards/chosen": -0.17326489090919495, "rewards/margins": 0.1733655482530594, "rewards/rejected": -0.34663042426109314, "step": 1737 }, { "epoch": 4.758384668035592, "grad_norm": 2.6583733558654785, "learning_rate": 7.619178082191781e-07, "log_odds_chosen": 1.6154707670211792, "log_odds_ratio": -0.41872042417526245, "logits/chosen": 0.8476060032844543, "logits/rejected": 0.8120326995849609, "logps/chosen": -2.2298262119293213, "logps/rejected": -3.7876124382019043, "loss": 0.7884, "nll_loss": 0.7465096712112427, "rewards/accuracies": 0.625, "rewards/chosen": -0.22298263013362885, "rewards/margins": 0.1557786464691162, "rewards/rejected": -0.37876126170158386, "step": 1738 }, { "epoch": 4.761122518822724, "grad_norm": 4.102367401123047, "learning_rate": 7.617808219178082e-07, "log_odds_chosen": 0.9309273362159729, "log_odds_ratio": -0.4343032240867615, "logits/chosen": 0.89049232006073, "logits/rejected": 0.7953658103942871, "logps/chosen": -1.889665126800537, "logps/rejected": -2.654700756072998, "loss": 0.7856, "nll_loss": 0.7422192692756653, "rewards/accuracies": 0.875, "rewards/chosen": -0.1889665126800537, "rewards/margins": 0.07650357484817505, "rewards/rejected": -0.26547008752822876, "step": 1739 }, { "epoch": 4.763860369609857, "grad_norm": 2.2520594596862793, "learning_rate": 7.616438356164383e-07, "log_odds_chosen": 3.7992210388183594, "log_odds_ratio": -0.08863086253404617, "logits/chosen": 0.6252699494361877, "logits/rejected": 0.5881673097610474, "logps/chosen": -1.8125336170196533, "logps/rejected": -5.337120532989502, "loss": 0.8732, "nll_loss": 0.8643113374710083, "rewards/accuracies": 1.0, "rewards/chosen": -0.1812533587217331, "rewards/margins": 0.3524586856365204, "rewards/rejected": -0.5337120294570923, "step": 1740 }, { "epoch": 4.766598220396988, "grad_norm": 2.4775166511535645, "learning_rate": 7.615068493150685e-07, "log_odds_chosen": 1.5279699563980103, "log_odds_ratio": -0.2696755528450012, "logits/chosen": 0.8213899731636047, "logits/rejected": 0.7744712233543396, "logps/chosen": -1.7918040752410889, "logps/rejected": -3.1650047302246094, "loss": 0.7216, "nll_loss": 0.6945855021476746, "rewards/accuracies": 1.0, "rewards/chosen": -0.17918041348457336, "rewards/margins": 0.13732008635997772, "rewards/rejected": -0.3165004849433899, "step": 1741 }, { "epoch": 4.769336071184121, "grad_norm": 4.205533027648926, "learning_rate": 7.613698630136985e-07, "log_odds_chosen": 2.1424639225006104, "log_odds_ratio": -0.638079047203064, "logits/chosen": 1.0735862255096436, "logits/rejected": 1.0839754343032837, "logps/chosen": -3.086902141571045, "logps/rejected": -5.1476263999938965, "loss": 0.8214, "nll_loss": 0.7575552463531494, "rewards/accuracies": 0.875, "rewards/chosen": -0.30869022011756897, "rewards/margins": 0.20607244968414307, "rewards/rejected": -0.5147626399993896, "step": 1742 }, { "epoch": 4.772073921971253, "grad_norm": 3.431244134902954, "learning_rate": 7.612328767123287e-07, "log_odds_chosen": 1.2639883756637573, "log_odds_ratio": -0.48045527935028076, "logits/chosen": 0.6711678504943848, "logits/rejected": 0.6367015838623047, "logps/chosen": -2.226280450820923, "logps/rejected": -3.380117416381836, "loss": 0.7943, "nll_loss": 0.74628746509552, "rewards/accuracies": 0.75, "rewards/chosen": -0.22262805700302124, "rewards/margins": 0.11538372933864594, "rewards/rejected": -0.338011771440506, "step": 1743 }, { "epoch": 4.774811772758385, "grad_norm": 4.021474838256836, "learning_rate": 7.610958904109589e-07, "log_odds_chosen": 1.8384978771209717, "log_odds_ratio": -0.4736226201057434, "logits/chosen": 1.1256719827651978, "logits/rejected": 1.156788945198059, "logps/chosen": -2.913666248321533, "logps/rejected": -4.6717448234558105, "loss": 0.8734, "nll_loss": 0.8260184526443481, "rewards/accuracies": 0.75, "rewards/chosen": -0.2913666367530823, "rewards/margins": 0.17580780386924744, "rewards/rejected": -0.4671744406223297, "step": 1744 }, { "epoch": 4.777549623545517, "grad_norm": 2.415276050567627, "learning_rate": 7.609589041095889e-07, "log_odds_chosen": 1.9993382692337036, "log_odds_ratio": -0.22197812795639038, "logits/chosen": 0.8596608638763428, "logits/rejected": 0.8801344633102417, "logps/chosen": -2.1117215156555176, "logps/rejected": -3.99354887008667, "loss": 0.7611, "nll_loss": 0.7389025092124939, "rewards/accuracies": 1.0, "rewards/chosen": -0.21117213368415833, "rewards/margins": 0.18818271160125732, "rewards/rejected": -0.39935487508773804, "step": 1745 }, { "epoch": 4.780287474332649, "grad_norm": 3.001732110977173, "learning_rate": 7.608219178082191e-07, "log_odds_chosen": 2.09271502494812, "log_odds_ratio": -0.19801408052444458, "logits/chosen": 1.216526746749878, "logits/rejected": 1.2460134029388428, "logps/chosen": -3.6052393913269043, "logps/rejected": -5.605224132537842, "loss": 0.7408, "nll_loss": 0.7210420966148376, "rewards/accuracies": 1.0, "rewards/chosen": -0.36052393913269043, "rewards/margins": 0.19999846816062927, "rewards/rejected": -0.5605224370956421, "step": 1746 }, { "epoch": 4.783025325119781, "grad_norm": 2.261624813079834, "learning_rate": 7.606849315068493e-07, "log_odds_chosen": 2.8356587886810303, "log_odds_ratio": -0.21179041266441345, "logits/chosen": 0.8651231527328491, "logits/rejected": 0.800068736076355, "logps/chosen": -1.4500809907913208, "logps/rejected": -4.055547714233398, "loss": 0.6922, "nll_loss": 0.6709814071655273, "rewards/accuracies": 1.0, "rewards/chosen": -0.1450081169605255, "rewards/margins": 0.26054665446281433, "rewards/rejected": -0.40555477142333984, "step": 1747 }, { "epoch": 4.785763175906913, "grad_norm": 3.297322988510132, "learning_rate": 7.605479452054794e-07, "log_odds_chosen": 2.7822415828704834, "log_odds_ratio": -0.30926498770713806, "logits/chosen": 0.8080684542655945, "logits/rejected": 0.8375011682510376, "logps/chosen": -2.299222946166992, "logps/rejected": -4.916037559509277, "loss": 0.8722, "nll_loss": 0.8412354588508606, "rewards/accuracies": 0.875, "rewards/chosen": -0.22992229461669922, "rewards/margins": 0.261681467294693, "rewards/rejected": -0.4916037619113922, "step": 1748 }, { "epoch": 4.788501026694045, "grad_norm": 2.348970651626587, "learning_rate": 7.604109589041095e-07, "log_odds_chosen": 2.7208385467529297, "log_odds_ratio": -0.19156232476234436, "logits/chosen": 0.5109153389930725, "logits/rejected": 0.4264545738697052, "logps/chosen": -1.7812083959579468, "logps/rejected": -4.358311176300049, "loss": 0.8189, "nll_loss": 0.7997846007347107, "rewards/accuracies": 1.0, "rewards/chosen": -0.17812085151672363, "rewards/margins": 0.2577102482318878, "rewards/rejected": -0.43583109974861145, "step": 1749 }, { "epoch": 4.791238877481177, "grad_norm": 2.7171268463134766, "learning_rate": 7.602739726027397e-07, "log_odds_chosen": 1.5017709732055664, "log_odds_ratio": -0.29380619525909424, "logits/chosen": 0.9140820503234863, "logits/rejected": 0.896808385848999, "logps/chosen": -1.9406965970993042, "logps/rejected": -3.2835443019866943, "loss": 0.8387, "nll_loss": 0.8092821836471558, "rewards/accuracies": 0.875, "rewards/chosen": -0.19406966865062714, "rewards/margins": 0.13428477942943573, "rewards/rejected": -0.3283544182777405, "step": 1750 }, { "epoch": 4.7939767282683095, "grad_norm": 2.522146463394165, "learning_rate": 7.601369863013698e-07, "log_odds_chosen": 1.8026063442230225, "log_odds_ratio": -0.2913946211338043, "logits/chosen": 0.8528681993484497, "logits/rejected": 0.7596777677536011, "logps/chosen": -1.688734769821167, "logps/rejected": -3.348832130432129, "loss": 0.8177, "nll_loss": 0.7885794639587402, "rewards/accuracies": 0.875, "rewards/chosen": -0.16887348890304565, "rewards/margins": 0.16600975394248962, "rewards/rejected": -0.3348832130432129, "step": 1751 }, { "epoch": 4.7967145790554415, "grad_norm": 2.3597214221954346, "learning_rate": 7.599999999999999e-07, "log_odds_chosen": 2.2932305335998535, "log_odds_ratio": -0.20433953404426575, "logits/chosen": 0.793793261051178, "logits/rejected": 0.7814761400222778, "logps/chosen": -1.8061295747756958, "logps/rejected": -3.943063259124756, "loss": 0.7673, "nll_loss": 0.7468400597572327, "rewards/accuracies": 0.875, "rewards/chosen": -0.1806129664182663, "rewards/margins": 0.21369338035583496, "rewards/rejected": -0.39430636167526245, "step": 1752 }, { "epoch": 4.799452429842574, "grad_norm": 6.135737895965576, "learning_rate": 7.598630136986301e-07, "log_odds_chosen": 1.388195276260376, "log_odds_ratio": -0.8124932050704956, "logits/chosen": 1.0496037006378174, "logits/rejected": 0.9873380661010742, "logps/chosen": -2.231752395629883, "logps/rejected": -3.414844274520874, "loss": 0.8486, "nll_loss": 0.7673550844192505, "rewards/accuracies": 0.75, "rewards/chosen": -0.2231752574443817, "rewards/margins": 0.11830919235944748, "rewards/rejected": -0.3414844274520874, "step": 1753 }, { "epoch": 4.802190280629706, "grad_norm": 5.696967124938965, "learning_rate": 7.597260273972602e-07, "log_odds_chosen": 1.5150330066680908, "log_odds_ratio": -0.437711626291275, "logits/chosen": 1.0105030536651611, "logits/rejected": 0.9275751113891602, "logps/chosen": -2.40327787399292, "logps/rejected": -3.8284199237823486, "loss": 0.7901, "nll_loss": 0.7463394403457642, "rewards/accuracies": 0.875, "rewards/chosen": -0.24032777547836304, "rewards/margins": 0.14251422882080078, "rewards/rejected": -0.3828420042991638, "step": 1754 }, { "epoch": 4.804928131416838, "grad_norm": 1.9854048490524292, "learning_rate": 7.595890410958904e-07, "log_odds_chosen": 2.483560085296631, "log_odds_ratio": -0.28546178340911865, "logits/chosen": 0.8230535387992859, "logits/rejected": 0.8111891150474548, "logps/chosen": -1.510686993598938, "logps/rejected": -3.8464643955230713, "loss": 0.7451, "nll_loss": 0.7165892124176025, "rewards/accuracies": 1.0, "rewards/chosen": -0.15106868743896484, "rewards/margins": 0.23357772827148438, "rewards/rejected": -0.384646475315094, "step": 1755 }, { "epoch": 4.80766598220397, "grad_norm": 3.6122939586639404, "learning_rate": 7.594520547945204e-07, "log_odds_chosen": 0.8599429130554199, "log_odds_ratio": -0.46601685881614685, "logits/chosen": 1.1823861598968506, "logits/rejected": 1.2251012325286865, "logps/chosen": -2.814493179321289, "logps/rejected": -3.613903045654297, "loss": 0.7337, "nll_loss": 0.6870497465133667, "rewards/accuracies": 0.625, "rewards/chosen": -0.2814493179321289, "rewards/margins": 0.07994099706411362, "rewards/rejected": -0.36139029264450073, "step": 1756 }, { "epoch": 4.810403832991102, "grad_norm": 2.8927924633026123, "learning_rate": 7.593150684931506e-07, "log_odds_chosen": 3.1038596630096436, "log_odds_ratio": -0.2402912676334381, "logits/chosen": 0.7701497673988342, "logits/rejected": 0.7093741297721863, "logps/chosen": -2.818821907043457, "logps/rejected": -5.828297138214111, "loss": 0.8071, "nll_loss": 0.7830294370651245, "rewards/accuracies": 0.875, "rewards/chosen": -0.2818821966648102, "rewards/margins": 0.3009474575519562, "rewards/rejected": -0.5828297138214111, "step": 1757 }, { "epoch": 4.813141683778234, "grad_norm": 4.383725643157959, "learning_rate": 7.591780821917808e-07, "log_odds_chosen": 0.9998471736907959, "log_odds_ratio": -0.533358097076416, "logits/chosen": 1.1268329620361328, "logits/rejected": 1.1370106935501099, "logps/chosen": -3.04903507232666, "logps/rejected": -3.998244285583496, "loss": 0.7506, "nll_loss": 0.6972827911376953, "rewards/accuracies": 0.75, "rewards/chosen": -0.304903507232666, "rewards/margins": 0.09492092579603195, "rewards/rejected": -0.39982447028160095, "step": 1758 }, { "epoch": 4.815879534565366, "grad_norm": 2.401301383972168, "learning_rate": 7.590410958904108e-07, "log_odds_chosen": 1.6929235458374023, "log_odds_ratio": -0.25216802954673767, "logits/chosen": 0.7139689326286316, "logits/rejected": 0.6561038494110107, "logps/chosen": -1.3772045373916626, "logps/rejected": -2.798926591873169, "loss": 0.7865, "nll_loss": 0.7613157033920288, "rewards/accuracies": 1.0, "rewards/chosen": -0.13772045075893402, "rewards/margins": 0.1421722024679184, "rewards/rejected": -0.2798926830291748, "step": 1759 }, { "epoch": 4.818617385352498, "grad_norm": 2.433742046356201, "learning_rate": 7.58904109589041e-07, "log_odds_chosen": 0.7146078944206238, "log_odds_ratio": -0.4904336929321289, "logits/chosen": 0.7921395897865295, "logits/rejected": 0.7913082838058472, "logps/chosen": -1.9195380210876465, "logps/rejected": -2.4953842163085938, "loss": 0.813, "nll_loss": 0.7639986276626587, "rewards/accuracies": 0.875, "rewards/chosen": -0.19195379316806793, "rewards/margins": 0.057584621012210846, "rewards/rejected": -0.24953843653202057, "step": 1760 }, { "epoch": 4.82135523613963, "grad_norm": 2.713125467300415, "learning_rate": 7.587671232876712e-07, "log_odds_chosen": 2.7477633953094482, "log_odds_ratio": -0.21285134553909302, "logits/chosen": 0.9081563949584961, "logits/rejected": 0.9439021348953247, "logps/chosen": -1.9740519523620605, "logps/rejected": -4.525722980499268, "loss": 0.7055, "nll_loss": 0.6841714978218079, "rewards/accuracies": 1.0, "rewards/chosen": -0.19740518927574158, "rewards/margins": 0.2551671266555786, "rewards/rejected": -0.4525723457336426, "step": 1761 }, { "epoch": 4.824093086926762, "grad_norm": 2.4878344535827637, "learning_rate": 7.586301369863013e-07, "log_odds_chosen": 1.5689698457717896, "log_odds_ratio": -0.33539754152297974, "logits/chosen": 0.49696993827819824, "logits/rejected": 0.4607617259025574, "logps/chosen": -1.7286427021026611, "logps/rejected": -3.0642786026000977, "loss": 0.7903, "nll_loss": 0.7567368745803833, "rewards/accuracies": 0.875, "rewards/chosen": -0.17286425828933716, "rewards/margins": 0.13356360793113708, "rewards/rejected": -0.30642786622047424, "step": 1762 }, { "epoch": 4.826830937713894, "grad_norm": 3.0673372745513916, "learning_rate": 7.584931506849314e-07, "log_odds_chosen": 1.837735652923584, "log_odds_ratio": -0.34673386812210083, "logits/chosen": 0.6332228779792786, "logits/rejected": 0.6210930347442627, "logps/chosen": -2.2869575023651123, "logps/rejected": -4.02035665512085, "loss": 0.7994, "nll_loss": 0.7646797299385071, "rewards/accuracies": 0.75, "rewards/chosen": -0.22869575023651123, "rewards/margins": 0.17333991825580597, "rewards/rejected": -0.4020356833934784, "step": 1763 }, { "epoch": 4.829568788501026, "grad_norm": 2.306345224380493, "learning_rate": 7.583561643835616e-07, "log_odds_chosen": 1.8112494945526123, "log_odds_ratio": -0.3860761523246765, "logits/chosen": 0.7601466774940491, "logits/rejected": 0.6928272247314453, "logps/chosen": -2.4008431434631348, "logps/rejected": -4.132913589477539, "loss": 0.8783, "nll_loss": 0.8396982550621033, "rewards/accuracies": 0.875, "rewards/chosen": -0.24008433520793915, "rewards/margins": 0.17320701479911804, "rewards/rejected": -0.413291335105896, "step": 1764 }, { "epoch": 4.832306639288159, "grad_norm": 5.047567367553711, "learning_rate": 7.582191780821917e-07, "log_odds_chosen": 0.8927398920059204, "log_odds_ratio": -0.6746443510055542, "logits/chosen": 0.7975343465805054, "logits/rejected": 0.8047791719436646, "logps/chosen": -2.660799503326416, "logps/rejected": -3.479614019393921, "loss": 0.8694, "nll_loss": 0.8019627928733826, "rewards/accuracies": 0.625, "rewards/chosen": -0.26607996225357056, "rewards/margins": 0.08188146352767944, "rewards/rejected": -0.34796142578125, "step": 1765 }, { "epoch": 4.8350444900752905, "grad_norm": 3.7893729209899902, "learning_rate": 7.580821917808218e-07, "log_odds_chosen": 2.042274236679077, "log_odds_ratio": -0.6223525404930115, "logits/chosen": 0.7839375734329224, "logits/rejected": 0.7327605485916138, "logps/chosen": -2.7151050567626953, "logps/rejected": -4.703472137451172, "loss": 0.949, "nll_loss": 0.8867761492729187, "rewards/accuracies": 0.75, "rewards/chosen": -0.2715104818344116, "rewards/margins": 0.19883671402931213, "rewards/rejected": -0.47034719586372375, "step": 1766 }, { "epoch": 4.837782340862423, "grad_norm": 2.2071354389190674, "learning_rate": 7.57945205479452e-07, "log_odds_chosen": 1.6596713066101074, "log_odds_ratio": -0.37764355540275574, "logits/chosen": 0.6732905507087708, "logits/rejected": 0.6703492999076843, "logps/chosen": -1.9487147331237793, "logps/rejected": -3.516599655151367, "loss": 0.803, "nll_loss": 0.7652684450149536, "rewards/accuracies": 0.875, "rewards/chosen": -0.19487148523330688, "rewards/margins": 0.15678851306438446, "rewards/rejected": -0.35165998339653015, "step": 1767 }, { "epoch": 4.840520191649555, "grad_norm": 2.3498504161834717, "learning_rate": 7.578082191780821e-07, "log_odds_chosen": 2.375091552734375, "log_odds_ratio": -0.23529495298862457, "logits/chosen": 0.995881199836731, "logits/rejected": 0.9803009033203125, "logps/chosen": -2.066650867462158, "logps/rejected": -4.264927864074707, "loss": 0.7832, "nll_loss": 0.7596715688705444, "rewards/accuracies": 1.0, "rewards/chosen": -0.20666509866714478, "rewards/margins": 0.21982768177986145, "rewards/rejected": -0.4264927804470062, "step": 1768 }, { "epoch": 4.843258042436688, "grad_norm": 2.551496744155884, "learning_rate": 7.576712328767123e-07, "log_odds_chosen": 0.4382975101470947, "log_odds_ratio": -0.5405611991882324, "logits/chosen": 0.7445789575576782, "logits/rejected": 0.722349226474762, "logps/chosen": -1.8348631858825684, "logps/rejected": -2.232698678970337, "loss": 0.8253, "nll_loss": 0.7712091207504272, "rewards/accuracies": 0.75, "rewards/chosen": -0.18348631262779236, "rewards/margins": 0.039783552289009094, "rewards/rejected": -0.22326987981796265, "step": 1769 }, { "epoch": 4.84599589322382, "grad_norm": 2.6344521045684814, "learning_rate": 7.575342465753424e-07, "log_odds_chosen": 1.0835858583450317, "log_odds_ratio": -0.5886367559432983, "logits/chosen": 0.7683640718460083, "logits/rejected": 0.7345665693283081, "logps/chosen": -1.729936122894287, "logps/rejected": -2.777873992919922, "loss": 0.8099, "nll_loss": 0.7510761022567749, "rewards/accuracies": 0.625, "rewards/chosen": -0.17299361526966095, "rewards/margins": 0.10479377955198288, "rewards/rejected": -0.27778738737106323, "step": 1770 }, { "epoch": 4.848733744010952, "grad_norm": 2.190507650375366, "learning_rate": 7.573972602739725e-07, "log_odds_chosen": 1.8704707622528076, "log_odds_ratio": -0.2838422954082489, "logits/chosen": 0.7884789705276489, "logits/rejected": 0.7496339082717896, "logps/chosen": -1.505396842956543, "logps/rejected": -3.223867893218994, "loss": 0.7291, "nll_loss": 0.7007429599761963, "rewards/accuracies": 1.0, "rewards/chosen": -0.15053968131542206, "rewards/margins": 0.1718471199274063, "rewards/rejected": -0.32238680124282837, "step": 1771 }, { "epoch": 4.851471594798084, "grad_norm": 2.6784329414367676, "learning_rate": 7.572602739726028e-07, "log_odds_chosen": 1.4618616104125977, "log_odds_ratio": -0.3387053906917572, "logits/chosen": 0.5872212648391724, "logits/rejected": 0.5243042707443237, "logps/chosen": -2.1664109230041504, "logps/rejected": -3.4722936153411865, "loss": 0.8441, "nll_loss": 0.8102499842643738, "rewards/accuracies": 1.0, "rewards/chosen": -0.21664109826087952, "rewards/margins": 0.13058827817440033, "rewards/rejected": -0.34722936153411865, "step": 1772 }, { "epoch": 4.854209445585216, "grad_norm": 3.0159928798675537, "learning_rate": 7.571232876712327e-07, "log_odds_chosen": 2.1958813667297363, "log_odds_ratio": -0.30524325370788574, "logits/chosen": 0.9151358604431152, "logits/rejected": 0.8870844841003418, "logps/chosen": -1.7332041263580322, "logps/rejected": -3.7646541595458984, "loss": 0.7438, "nll_loss": 0.7132772207260132, "rewards/accuracies": 1.0, "rewards/chosen": -0.17332041263580322, "rewards/margins": 0.20314498245716095, "rewards/rejected": -0.37646540999412537, "step": 1773 }, { "epoch": 4.856947296372348, "grad_norm": 2.3176369667053223, "learning_rate": 7.56986301369863e-07, "log_odds_chosen": 2.531172752380371, "log_odds_ratio": -0.1601230800151825, "logits/chosen": 1.3138705492019653, "logits/rejected": 1.3276065587997437, "logps/chosen": -1.9054393768310547, "logps/rejected": -4.2702836990356445, "loss": 0.6243, "nll_loss": 0.6082731485366821, "rewards/accuracies": 1.0, "rewards/chosen": -0.19054393470287323, "rewards/margins": 0.23648439347743988, "rewards/rejected": -0.4270283579826355, "step": 1774 }, { "epoch": 4.85968514715948, "grad_norm": 2.5646896362304688, "learning_rate": 7.568493150684932e-07, "log_odds_chosen": 2.514951705932617, "log_odds_ratio": -0.38593557476997375, "logits/chosen": 0.997018039226532, "logits/rejected": 0.9833144545555115, "logps/chosen": -2.0728864669799805, "logps/rejected": -4.519527912139893, "loss": 0.828, "nll_loss": 0.7894365787506104, "rewards/accuracies": 0.875, "rewards/chosen": -0.20728865265846252, "rewards/margins": 0.24466416239738464, "rewards/rejected": -0.45195281505584717, "step": 1775 }, { "epoch": 4.862422997946612, "grad_norm": 2.7824230194091797, "learning_rate": 7.567123287671233e-07, "log_odds_chosen": 1.3390281200408936, "log_odds_ratio": -0.32928475737571716, "logits/chosen": 0.889836311340332, "logits/rejected": 0.9059881567955017, "logps/chosen": -2.3005752563476562, "logps/rejected": -3.5571205615997314, "loss": 0.8505, "nll_loss": 0.8175299763679504, "rewards/accuracies": 1.0, "rewards/chosen": -0.2300575226545334, "rewards/margins": 0.12565450370311737, "rewards/rejected": -0.35571205615997314, "step": 1776 }, { "epoch": 4.865160848733744, "grad_norm": 2.5718767642974854, "learning_rate": 7.565753424657534e-07, "log_odds_chosen": 2.4776480197906494, "log_odds_ratio": -0.2237367331981659, "logits/chosen": 1.1963438987731934, "logits/rejected": 1.2446397542953491, "logps/chosen": -2.1366653442382812, "logps/rejected": -4.458526611328125, "loss": 0.6585, "nll_loss": 0.6361672878265381, "rewards/accuracies": 0.875, "rewards/chosen": -0.21366655826568604, "rewards/margins": 0.23218609392642975, "rewards/rejected": -0.445852667093277, "step": 1777 }, { "epoch": 4.867898699520876, "grad_norm": 2.642385721206665, "learning_rate": 7.564383561643836e-07, "log_odds_chosen": 2.459547996520996, "log_odds_ratio": -0.28875505924224854, "logits/chosen": 1.0264922380447388, "logits/rejected": 1.0102994441986084, "logps/chosen": -2.5711357593536377, "logps/rejected": -4.88616943359375, "loss": 0.723, "nll_loss": 0.6941494345664978, "rewards/accuracies": 0.875, "rewards/chosen": -0.25711357593536377, "rewards/margins": 0.23150338232517242, "rewards/rejected": -0.488616943359375, "step": 1778 }, { "epoch": 4.870636550308008, "grad_norm": 2.6477131843566895, "learning_rate": 7.563013698630137e-07, "log_odds_chosen": 1.2068250179290771, "log_odds_ratio": -0.42138856649398804, "logits/chosen": 0.554079532623291, "logits/rejected": 0.4473361372947693, "logps/chosen": -1.8477846384048462, "logps/rejected": -2.971865177154541, "loss": 0.874, "nll_loss": 0.8318614959716797, "rewards/accuracies": 1.0, "rewards/chosen": -0.18477846682071686, "rewards/margins": 0.11240803450345993, "rewards/rejected": -0.2971864938735962, "step": 1779 }, { "epoch": 4.87337440109514, "grad_norm": 3.948132276535034, "learning_rate": 7.561643835616438e-07, "log_odds_chosen": 1.322762131690979, "log_odds_ratio": -0.570652961730957, "logits/chosen": 0.7559534907341003, "logits/rejected": 0.7478915452957153, "logps/chosen": -2.919065475463867, "logps/rejected": -4.160017013549805, "loss": 0.8681, "nll_loss": 0.8110671043395996, "rewards/accuracies": 0.625, "rewards/chosen": -0.29190656542778015, "rewards/margins": 0.12409515678882599, "rewards/rejected": -0.41600170731544495, "step": 1780 }, { "epoch": 4.876112251882272, "grad_norm": 2.783482551574707, "learning_rate": 7.56027397260274e-07, "log_odds_chosen": 1.7702000141143799, "log_odds_ratio": -0.3425821363925934, "logits/chosen": 0.7221035361289978, "logits/rejected": 0.649512529373169, "logps/chosen": -1.3726329803466797, "logps/rejected": -2.9577372074127197, "loss": 0.7912, "nll_loss": 0.7569095492362976, "rewards/accuracies": 1.0, "rewards/chosen": -0.13726331293582916, "rewards/margins": 0.15851040184497833, "rewards/rejected": -0.2957737445831299, "step": 1781 }, { "epoch": 4.878850102669404, "grad_norm": 2.544644355773926, "learning_rate": 7.558904109589041e-07, "log_odds_chosen": 1.229175329208374, "log_odds_ratio": -0.32936960458755493, "logits/chosen": 0.9483451247215271, "logits/rejected": 0.9268661737442017, "logps/chosen": -2.1454806327819824, "logps/rejected": -3.3117473125457764, "loss": 0.7538, "nll_loss": 0.7208684086799622, "rewards/accuracies": 1.0, "rewards/chosen": -0.21454806625843048, "rewards/margins": 0.11662669479846954, "rewards/rejected": -0.33117473125457764, "step": 1782 }, { "epoch": 4.8815879534565365, "grad_norm": 3.1196486949920654, "learning_rate": 7.557534246575343e-07, "log_odds_chosen": 2.0791890621185303, "log_odds_ratio": -0.3651757538318634, "logits/chosen": 1.1514549255371094, "logits/rejected": 1.1655399799346924, "logps/chosen": -2.2666690349578857, "logps/rejected": -4.185173034667969, "loss": 0.6652, "nll_loss": 0.6286803483963013, "rewards/accuracies": 0.875, "rewards/chosen": -0.2266668975353241, "rewards/margins": 0.1918504238128662, "rewards/rejected": -0.4185172915458679, "step": 1783 }, { "epoch": 4.884325804243669, "grad_norm": 2.7110068798065186, "learning_rate": 7.556164383561644e-07, "log_odds_chosen": 0.8112479448318481, "log_odds_ratio": -0.40145790576934814, "logits/chosen": 0.9929031133651733, "logits/rejected": 1.0012574195861816, "logps/chosen": -2.4048357009887695, "logps/rejected": -3.1380414962768555, "loss": 0.8042, "nll_loss": 0.7640892863273621, "rewards/accuracies": 1.0, "rewards/chosen": -0.24048356711864471, "rewards/margins": 0.07332056760787964, "rewards/rejected": -0.31380411982536316, "step": 1784 }, { "epoch": 4.887063655030801, "grad_norm": 2.784672737121582, "learning_rate": 7.554794520547945e-07, "log_odds_chosen": 2.5068953037261963, "log_odds_ratio": -0.19704163074493408, "logits/chosen": 0.7049175500869751, "logits/rejected": 0.7016793489456177, "logps/chosen": -2.215709686279297, "logps/rejected": -4.610560894012451, "loss": 0.8436, "nll_loss": 0.8238555192947388, "rewards/accuracies": 1.0, "rewards/chosen": -0.2215709686279297, "rewards/margins": 0.23948512971401215, "rewards/rejected": -0.461056113243103, "step": 1785 }, { "epoch": 4.889801505817933, "grad_norm": 2.207463026046753, "learning_rate": 7.553424657534247e-07, "log_odds_chosen": 3.345473289489746, "log_odds_ratio": -0.07577431201934814, "logits/chosen": 0.966067910194397, "logits/rejected": 1.0043326616287231, "logps/chosen": -2.3523752689361572, "logps/rejected": -5.457769870758057, "loss": 0.6429, "nll_loss": 0.6353633999824524, "rewards/accuracies": 1.0, "rewards/chosen": -0.23523752391338348, "rewards/margins": 0.31053948402404785, "rewards/rejected": -0.5457769632339478, "step": 1786 }, { "epoch": 4.892539356605065, "grad_norm": 3.6882941722869873, "learning_rate": 7.552054794520547e-07, "log_odds_chosen": 1.9121731519699097, "log_odds_ratio": -0.5055842399597168, "logits/chosen": 0.8120768666267395, "logits/rejected": 0.8602741956710815, "logps/chosen": -2.4804270267486572, "logps/rejected": -4.2994818687438965, "loss": 0.9508, "nll_loss": 0.9002020359039307, "rewards/accuracies": 0.75, "rewards/chosen": -0.24804270267486572, "rewards/margins": 0.18190550804138184, "rewards/rejected": -0.42994821071624756, "step": 1787 }, { "epoch": 4.895277207392197, "grad_norm": 3.3055484294891357, "learning_rate": 7.550684931506849e-07, "log_odds_chosen": 1.4381593465805054, "log_odds_ratio": -0.4615083336830139, "logits/chosen": 1.0424035787582397, "logits/rejected": 1.0335229635238647, "logps/chosen": -2.644618511199951, "logps/rejected": -4.005832672119141, "loss": 0.7679, "nll_loss": 0.721787691116333, "rewards/accuracies": 0.875, "rewards/chosen": -0.26446184515953064, "rewards/margins": 0.13612139225006104, "rewards/rejected": -0.4005832374095917, "step": 1788 }, { "epoch": 4.898015058179329, "grad_norm": 3.7683632373809814, "learning_rate": 7.549315068493151e-07, "log_odds_chosen": 2.5369713306427, "log_odds_ratio": -0.21161845326423645, "logits/chosen": 0.759647786617279, "logits/rejected": 0.6847569942474365, "logps/chosen": -1.7703704833984375, "logps/rejected": -4.1058669090271, "loss": 0.7701, "nll_loss": 0.7489681839942932, "rewards/accuracies": 0.875, "rewards/chosen": -0.17703703045845032, "rewards/margins": 0.23354965448379517, "rewards/rejected": -0.4105866849422455, "step": 1789 }, { "epoch": 4.900752908966461, "grad_norm": 2.7614591121673584, "learning_rate": 7.547945205479452e-07, "log_odds_chosen": 2.6792831420898438, "log_odds_ratio": -0.24407373368740082, "logits/chosen": 1.1781351566314697, "logits/rejected": 1.1698939800262451, "logps/chosen": -1.822492241859436, "logps/rejected": -4.29429292678833, "loss": 0.6479, "nll_loss": 0.6234756708145142, "rewards/accuracies": 1.0, "rewards/chosen": -0.18224923312664032, "rewards/margins": 0.24718007445335388, "rewards/rejected": -0.4294293224811554, "step": 1790 }, { "epoch": 4.903490759753593, "grad_norm": 2.889284610748291, "learning_rate": 7.546575342465753e-07, "log_odds_chosen": 1.4972920417785645, "log_odds_ratio": -0.2923019826412201, "logits/chosen": 0.5695847868919373, "logits/rejected": 0.46068552136421204, "logps/chosen": -2.3176095485687256, "logps/rejected": -3.734872817993164, "loss": 0.8498, "nll_loss": 0.8205921649932861, "rewards/accuracies": 1.0, "rewards/chosen": -0.23176097869873047, "rewards/margins": 0.1417263001203537, "rewards/rejected": -0.37348729372024536, "step": 1791 }, { "epoch": 4.906228610540726, "grad_norm": 2.3336164951324463, "learning_rate": 7.545205479452055e-07, "log_odds_chosen": 2.0676345825195312, "log_odds_ratio": -0.23390145599842072, "logits/chosen": 0.7042721509933472, "logits/rejected": 0.7114455699920654, "logps/chosen": -1.5921545028686523, "logps/rejected": -3.4222097396850586, "loss": 0.7664, "nll_loss": 0.743050217628479, "rewards/accuracies": 1.0, "rewards/chosen": -0.15921545028686523, "rewards/margins": 0.18300551176071167, "rewards/rejected": -0.3422209620475769, "step": 1792 }, { "epoch": 4.908966461327857, "grad_norm": 2.4039971828460693, "learning_rate": 7.543835616438356e-07, "log_odds_chosen": 3.3603546619415283, "log_odds_ratio": -0.16398227214813232, "logits/chosen": 0.7444879412651062, "logits/rejected": 0.6714684963226318, "logps/chosen": -2.077176094055176, "logps/rejected": -5.28456449508667, "loss": 0.8614, "nll_loss": 0.8450140953063965, "rewards/accuracies": 1.0, "rewards/chosen": -0.207717627286911, "rewards/margins": 0.32073885202407837, "rewards/rejected": -0.528456449508667, "step": 1793 }, { "epoch": 4.91170431211499, "grad_norm": 2.035205841064453, "learning_rate": 7.542465753424657e-07, "log_odds_chosen": 2.641185760498047, "log_odds_ratio": -0.2539379298686981, "logits/chosen": 0.8058138489723206, "logits/rejected": 0.7524920105934143, "logps/chosen": -2.3195061683654785, "logps/rejected": -4.8567705154418945, "loss": 0.7649, "nll_loss": 0.7394683361053467, "rewards/accuracies": 0.875, "rewards/chosen": -0.23195061087608337, "rewards/margins": 0.25372639298439026, "rewards/rejected": -0.48567700386047363, "step": 1794 }, { "epoch": 4.914442162902122, "grad_norm": 2.205874443054199, "learning_rate": 7.541095890410959e-07, "log_odds_chosen": 2.22888445854187, "log_odds_ratio": -0.268018513917923, "logits/chosen": 0.823434591293335, "logits/rejected": 0.7915260791778564, "logps/chosen": -2.792034387588501, "logps/rejected": -4.892809867858887, "loss": 0.7695, "nll_loss": 0.7427437901496887, "rewards/accuracies": 0.75, "rewards/chosen": -0.2792034149169922, "rewards/margins": 0.21007755398750305, "rewards/rejected": -0.48928096890449524, "step": 1795 }, { "epoch": 4.917180013689254, "grad_norm": 2.314364433288574, "learning_rate": 7.53972602739726e-07, "log_odds_chosen": 1.4816185235977173, "log_odds_ratio": -0.3101029396057129, "logits/chosen": 0.7414416074752808, "logits/rejected": 0.6846418976783752, "logps/chosen": -1.8969602584838867, "logps/rejected": -3.2575833797454834, "loss": 0.7724, "nll_loss": 0.7414047718048096, "rewards/accuracies": 1.0, "rewards/chosen": -0.1896960437297821, "rewards/margins": 0.13606229424476624, "rewards/rejected": -0.32575833797454834, "step": 1796 }, { "epoch": 4.919917864476386, "grad_norm": 2.648691177368164, "learning_rate": 7.538356164383562e-07, "log_odds_chosen": 1.7618608474731445, "log_odds_ratio": -0.27896085381507874, "logits/chosen": 0.8961355686187744, "logits/rejected": 0.9330575466156006, "logps/chosen": -2.4130001068115234, "logps/rejected": -4.041540145874023, "loss": 0.7596, "nll_loss": 0.7317005395889282, "rewards/accuracies": 1.0, "rewards/chosen": -0.24130001664161682, "rewards/margins": 0.16285400092601776, "rewards/rejected": -0.4041540026664734, "step": 1797 }, { "epoch": 4.922655715263518, "grad_norm": 2.4786038398742676, "learning_rate": 7.536986301369863e-07, "log_odds_chosen": 2.4204421043395996, "log_odds_ratio": -0.22553183138370514, "logits/chosen": 0.5099807977676392, "logits/rejected": 0.47961941361427307, "logps/chosen": -1.9383978843688965, "logps/rejected": -4.194491386413574, "loss": 0.8033, "nll_loss": 0.7807235717773438, "rewards/accuracies": 1.0, "rewards/chosen": -0.19383980333805084, "rewards/margins": 0.22560936212539673, "rewards/rejected": -0.4194491505622864, "step": 1798 }, { "epoch": 4.9253935660506505, "grad_norm": 3.7148678302764893, "learning_rate": 7.535616438356164e-07, "log_odds_chosen": 1.9333670139312744, "log_odds_ratio": -0.6666110754013062, "logits/chosen": 1.1582930088043213, "logits/rejected": 1.184302806854248, "logps/chosen": -3.0517194271087646, "logps/rejected": -5.004375457763672, "loss": 0.7955, "nll_loss": 0.7288236021995544, "rewards/accuracies": 0.625, "rewards/chosen": -0.305171936750412, "rewards/margins": 0.19526562094688416, "rewards/rejected": -0.5004375576972961, "step": 1799 }, { "epoch": 4.9281314168377826, "grad_norm": 2.280473470687866, "learning_rate": 7.534246575342466e-07, "log_odds_chosen": 3.0280585289001465, "log_odds_ratio": -0.16133981943130493, "logits/chosen": 1.0341554880142212, "logits/rejected": 1.0324883460998535, "logps/chosen": -2.376558780670166, "logps/rejected": -5.298233509063721, "loss": 0.7376, "nll_loss": 0.7214570045471191, "rewards/accuracies": 1.0, "rewards/chosen": -0.2376558929681778, "rewards/margins": 0.2921674847602844, "rewards/rejected": -0.529823362827301, "step": 1800 }, { "epoch": 4.930869267624915, "grad_norm": 3.511418581008911, "learning_rate": 7.532876712328767e-07, "log_odds_chosen": 1.9075950384140015, "log_odds_ratio": -0.39021873474121094, "logits/chosen": 0.8790562152862549, "logits/rejected": 0.9223665595054626, "logps/chosen": -2.4228668212890625, "logps/rejected": -4.170106887817383, "loss": 0.6833, "nll_loss": 0.6442571878433228, "rewards/accuracies": 0.875, "rewards/chosen": -0.24228668212890625, "rewards/margins": 0.17472398281097412, "rewards/rejected": -0.41701066493988037, "step": 1801 }, { "epoch": 4.933607118412047, "grad_norm": 2.5668303966522217, "learning_rate": 7.531506849315068e-07, "log_odds_chosen": 3.7755863666534424, "log_odds_ratio": -0.05844718962907791, "logits/chosen": 0.7158101797103882, "logits/rejected": 0.6915575861930847, "logps/chosen": -1.9802807569503784, "logps/rejected": -5.503570079803467, "loss": 0.7215, "nll_loss": 0.7156215906143188, "rewards/accuracies": 1.0, "rewards/chosen": -0.1980280727148056, "rewards/margins": 0.35232892632484436, "rewards/rejected": -0.5503569841384888, "step": 1802 }, { "epoch": 4.936344969199179, "grad_norm": 2.165677785873413, "learning_rate": 7.53013698630137e-07, "log_odds_chosen": 2.080148696899414, "log_odds_ratio": -0.2737745940685272, "logits/chosen": 0.6260384321212769, "logits/rejected": 0.5748181343078613, "logps/chosen": -2.394888401031494, "logps/rejected": -4.402735710144043, "loss": 0.8217, "nll_loss": 0.7943239212036133, "rewards/accuracies": 0.875, "rewards/chosen": -0.23948884010314941, "rewards/margins": 0.20078474283218384, "rewards/rejected": -0.44027361273765564, "step": 1803 }, { "epoch": 4.939082819986311, "grad_norm": 2.292856454849243, "learning_rate": 7.528767123287671e-07, "log_odds_chosen": 1.7432589530944824, "log_odds_ratio": -0.2655869126319885, "logits/chosen": 0.9828908443450928, "logits/rejected": 0.9796454906463623, "logps/chosen": -1.7776391506195068, "logps/rejected": -3.387346029281616, "loss": 0.6723, "nll_loss": 0.6457198858261108, "rewards/accuracies": 0.875, "rewards/chosen": -0.1777639091014862, "rewards/margins": 0.16097070276737213, "rewards/rejected": -0.33873462677001953, "step": 1804 }, { "epoch": 4.941820670773443, "grad_norm": 4.226170063018799, "learning_rate": 7.527397260273972e-07, "log_odds_chosen": 1.0285484790802002, "log_odds_ratio": -0.43475332856178284, "logits/chosen": 1.1089723110198975, "logits/rejected": 1.1341758966445923, "logps/chosen": -2.4941046237945557, "logps/rejected": -3.3771417140960693, "loss": 0.7737, "nll_loss": 0.7302629947662354, "rewards/accuracies": 0.75, "rewards/chosen": -0.249410480260849, "rewards/margins": 0.08830370754003525, "rewards/rejected": -0.33771416544914246, "step": 1805 }, { "epoch": 4.944558521560575, "grad_norm": 3.0704476833343506, "learning_rate": 7.526027397260274e-07, "log_odds_chosen": 0.7480247020721436, "log_odds_ratio": -0.5408514142036438, "logits/chosen": 0.7725553512573242, "logits/rejected": 0.7533248066902161, "logps/chosen": -2.3799374103546143, "logps/rejected": -2.9663634300231934, "loss": 0.7285, "nll_loss": 0.6744205355644226, "rewards/accuracies": 0.75, "rewards/chosen": -0.2379937469959259, "rewards/margins": 0.05864258110523224, "rewards/rejected": -0.29663634300231934, "step": 1806 }, { "epoch": 4.947296372347707, "grad_norm": 2.1749255657196045, "learning_rate": 7.524657534246575e-07, "log_odds_chosen": 2.1383273601531982, "log_odds_ratio": -0.17110012471675873, "logits/chosen": 0.8427618741989136, "logits/rejected": 0.6892510652542114, "logps/chosen": -1.5225955247879028, "logps/rejected": -3.462139129638672, "loss": 0.7321, "nll_loss": 0.7149895429611206, "rewards/accuracies": 1.0, "rewards/chosen": -0.15225954353809357, "rewards/margins": 0.19395437836647034, "rewards/rejected": -0.3462139070034027, "step": 1807 }, { "epoch": 4.950034223134839, "grad_norm": 2.448659658432007, "learning_rate": 7.523287671232876e-07, "log_odds_chosen": 2.387890577316284, "log_odds_ratio": -0.25381574034690857, "logits/chosen": 0.7839125394821167, "logits/rejected": 0.7353971004486084, "logps/chosen": -1.9638725519180298, "logps/rejected": -4.201850414276123, "loss": 0.7444, "nll_loss": 0.719049870967865, "rewards/accuracies": 1.0, "rewards/chosen": -0.19638726115226746, "rewards/margins": 0.22379782795906067, "rewards/rejected": -0.4201850891113281, "step": 1808 }, { "epoch": 4.952772073921971, "grad_norm": 2.5317673683166504, "learning_rate": 7.521917808219178e-07, "log_odds_chosen": 1.8408660888671875, "log_odds_ratio": -0.242934450507164, "logits/chosen": 0.7469488382339478, "logits/rejected": 0.7467765808105469, "logps/chosen": -1.665558099746704, "logps/rejected": -3.3640480041503906, "loss": 0.7387, "nll_loss": 0.7143670320510864, "rewards/accuracies": 1.0, "rewards/chosen": -0.16655580699443817, "rewards/margins": 0.1698489785194397, "rewards/rejected": -0.33640480041503906, "step": 1809 }, { "epoch": 4.955509924709103, "grad_norm": 2.232344388961792, "learning_rate": 7.520547945205479e-07, "log_odds_chosen": 3.238032102584839, "log_odds_ratio": -0.15598884224891663, "logits/chosen": 0.8621854782104492, "logits/rejected": 0.8418533802032471, "logps/chosen": -2.026045322418213, "logps/rejected": -5.079188346862793, "loss": 0.8728, "nll_loss": 0.8571681976318359, "rewards/accuracies": 1.0, "rewards/chosen": -0.2026045173406601, "rewards/margins": 0.305314302444458, "rewards/rejected": -0.5079188346862793, "step": 1810 }, { "epoch": 4.958247775496235, "grad_norm": 2.7123377323150635, "learning_rate": 7.519178082191781e-07, "log_odds_chosen": 2.6962718963623047, "log_odds_ratio": -0.19276610016822815, "logits/chosen": 0.6797864437103271, "logits/rejected": 0.6288423538208008, "logps/chosen": -1.5463131666183472, "logps/rejected": -4.003382682800293, "loss": 0.6977, "nll_loss": 0.6784513592720032, "rewards/accuracies": 1.0, "rewards/chosen": -0.15463131666183472, "rewards/margins": 0.2457069605588913, "rewards/rejected": -0.4003382921218872, "step": 1811 }, { "epoch": 4.960985626283367, "grad_norm": 3.6843113899230957, "learning_rate": 7.517808219178082e-07, "log_odds_chosen": 2.1985812187194824, "log_odds_ratio": -0.28148677945137024, "logits/chosen": 0.6334854364395142, "logits/rejected": 0.5406094789505005, "logps/chosen": -1.9218870401382446, "logps/rejected": -3.95101261138916, "loss": 0.801, "nll_loss": 0.7728564143180847, "rewards/accuracies": 0.875, "rewards/chosen": -0.19218870997428894, "rewards/margins": 0.20291253924369812, "rewards/rejected": -0.39510124921798706, "step": 1812 }, { "epoch": 4.963723477070499, "grad_norm": 3.161039113998413, "learning_rate": 7.516438356164383e-07, "log_odds_chosen": 0.16474154591560364, "log_odds_ratio": -0.7165186405181885, "logits/chosen": 0.6101593971252441, "logits/rejected": 0.588771641254425, "logps/chosen": -2.0837268829345703, "logps/rejected": -2.180952787399292, "loss": 0.8697, "nll_loss": 0.798004686832428, "rewards/accuracies": 0.75, "rewards/chosen": -0.20837269723415375, "rewards/margins": 0.009722590446472168, "rewards/rejected": -0.21809527277946472, "step": 1813 }, { "epoch": 4.9664613278576315, "grad_norm": 3.704805374145508, "learning_rate": 7.515068493150685e-07, "log_odds_chosen": 1.3485698699951172, "log_odds_ratio": -0.6298385858535767, "logits/chosen": 1.0050417184829712, "logits/rejected": 1.046627163887024, "logps/chosen": -2.738618850708008, "logps/rejected": -4.075314998626709, "loss": 0.8094, "nll_loss": 0.7464449405670166, "rewards/accuracies": 0.5, "rewards/chosen": -0.2738618850708008, "rewards/margins": 0.1336696445941925, "rewards/rejected": -0.4075314998626709, "step": 1814 }, { "epoch": 4.969199178644764, "grad_norm": 2.151740550994873, "learning_rate": 7.513698630136986e-07, "log_odds_chosen": 1.7040374279022217, "log_odds_ratio": -0.35633501410484314, "logits/chosen": 0.6193218231201172, "logits/rejected": 0.5402538180351257, "logps/chosen": -1.7821323871612549, "logps/rejected": -3.3929691314697266, "loss": 0.7658, "nll_loss": 0.7301838397979736, "rewards/accuracies": 0.75, "rewards/chosen": -0.1782132238149643, "rewards/margins": 0.1610836684703827, "rewards/rejected": -0.3392969071865082, "step": 1815 }, { "epoch": 4.971937029431896, "grad_norm": 2.240039348602295, "learning_rate": 7.512328767123287e-07, "log_odds_chosen": 1.4378585815429688, "log_odds_ratio": -0.4041862189769745, "logits/chosen": 0.4950460195541382, "logits/rejected": 0.4541271924972534, "logps/chosen": -1.8321795463562012, "logps/rejected": -3.162262439727783, "loss": 0.8097, "nll_loss": 0.7693020105361938, "rewards/accuracies": 0.875, "rewards/chosen": -0.18321794271469116, "rewards/margins": 0.13300827145576477, "rewards/rejected": -0.31622621417045593, "step": 1816 }, { "epoch": 4.974674880219028, "grad_norm": 3.2818586826324463, "learning_rate": 7.510958904109589e-07, "log_odds_chosen": 1.5494332313537598, "log_odds_ratio": -0.4197491407394409, "logits/chosen": 0.6483154296875, "logits/rejected": 0.6017193794250488, "logps/chosen": -2.3869566917419434, "logps/rejected": -3.7751665115356445, "loss": 0.7603, "nll_loss": 0.71835857629776, "rewards/accuracies": 0.875, "rewards/chosen": -0.2386956661939621, "rewards/margins": 0.13882096111774445, "rewards/rejected": -0.37751662731170654, "step": 1817 }, { "epoch": 4.97741273100616, "grad_norm": 2.423557758331299, "learning_rate": 7.509589041095889e-07, "log_odds_chosen": 1.9361358880996704, "log_odds_ratio": -0.3038528561592102, "logits/chosen": 0.9429707527160645, "logits/rejected": 0.8940830230712891, "logps/chosen": -2.2347006797790527, "logps/rejected": -4.091241359710693, "loss": 0.7709, "nll_loss": 0.7404853105545044, "rewards/accuracies": 1.0, "rewards/chosen": -0.2234700620174408, "rewards/margins": 0.18565408885478973, "rewards/rejected": -0.40912413597106934, "step": 1818 }, { "epoch": 4.980150581793293, "grad_norm": 2.26071834564209, "learning_rate": 7.508219178082191e-07, "log_odds_chosen": 1.6271593570709229, "log_odds_ratio": -0.25038817524909973, "logits/chosen": 0.7335706353187561, "logits/rejected": 0.6844305396080017, "logps/chosen": -1.6859829425811768, "logps/rejected": -3.154930591583252, "loss": 0.7538, "nll_loss": 0.7287533283233643, "rewards/accuracies": 1.0, "rewards/chosen": -0.16859829425811768, "rewards/margins": 0.14689476788043976, "rewards/rejected": -0.31549307703971863, "step": 1819 }, { "epoch": 4.982888432580424, "grad_norm": 2.789541482925415, "learning_rate": 7.506849315068493e-07, "log_odds_chosen": 2.1472222805023193, "log_odds_ratio": -0.28185904026031494, "logits/chosen": 0.8674973249435425, "logits/rejected": 0.7934801578521729, "logps/chosen": -2.130918025970459, "logps/rejected": -4.178013801574707, "loss": 0.709, "nll_loss": 0.6807641983032227, "rewards/accuracies": 1.0, "rewards/chosen": -0.21309182047843933, "rewards/margins": 0.20470955967903137, "rewards/rejected": -0.4178013801574707, "step": 1820 }, { "epoch": 4.985626283367557, "grad_norm": 2.2432775497436523, "learning_rate": 7.505479452054794e-07, "log_odds_chosen": 1.3029308319091797, "log_odds_ratio": -0.3274195194244385, "logits/chosen": 0.8778582811355591, "logits/rejected": 0.8456747531890869, "logps/chosen": -2.030299663543701, "logps/rejected": -3.202308416366577, "loss": 0.7437, "nll_loss": 0.7109143733978271, "rewards/accuracies": 0.875, "rewards/chosen": -0.20302997529506683, "rewards/margins": 0.11720086634159088, "rewards/rejected": -0.3202308416366577, "step": 1821 }, { "epoch": 4.988364134154689, "grad_norm": 2.8107855319976807, "learning_rate": 7.504109589041095e-07, "log_odds_chosen": 1.6503007411956787, "log_odds_ratio": -0.299793004989624, "logits/chosen": 0.9262900352478027, "logits/rejected": 0.9258607625961304, "logps/chosen": -2.150850534439087, "logps/rejected": -3.6660871505737305, "loss": 0.7225, "nll_loss": 0.6924954056739807, "rewards/accuracies": 0.875, "rewards/chosen": -0.21508505940437317, "rewards/margins": 0.1515236645936966, "rewards/rejected": -0.36660870909690857, "step": 1822 }, { "epoch": 4.991101984941821, "grad_norm": 2.428844451904297, "learning_rate": 7.502739726027397e-07, "log_odds_chosen": 3.5469322204589844, "log_odds_ratio": -0.17473585903644562, "logits/chosen": 0.7024307250976562, "logits/rejected": 0.6296831369400024, "logps/chosen": -2.026244878768921, "logps/rejected": -5.40036678314209, "loss": 0.7637, "nll_loss": 0.7462452054023743, "rewards/accuracies": 1.0, "rewards/chosen": -0.20262449979782104, "rewards/margins": 0.3374122083187103, "rewards/rejected": -0.5400367379188538, "step": 1823 }, { "epoch": 4.993839835728953, "grad_norm": 2.5134294033050537, "learning_rate": 7.501369863013698e-07, "log_odds_chosen": 1.1924039125442505, "log_odds_ratio": -0.31403133273124695, "logits/chosen": 0.6640169620513916, "logits/rejected": 0.6159321665763855, "logps/chosen": -2.1720011234283447, "logps/rejected": -3.271589756011963, "loss": 0.7828, "nll_loss": 0.7513755559921265, "rewards/accuracies": 1.0, "rewards/chosen": -0.2172001153230667, "rewards/margins": 0.10995887964963913, "rewards/rejected": -0.32715898752212524, "step": 1824 }, { "epoch": 4.996577686516085, "grad_norm": 3.4013831615448, "learning_rate": 7.5e-07, "log_odds_chosen": 2.185739517211914, "log_odds_ratio": -0.2999934256076813, "logits/chosen": 0.886702299118042, "logits/rejected": 0.9240964651107788, "logps/chosen": -2.01261830329895, "logps/rejected": -4.055305004119873, "loss": 0.7742, "nll_loss": 0.7441802620887756, "rewards/accuracies": 0.875, "rewards/chosen": -0.20126183331012726, "rewards/margins": 0.2042686641216278, "rewards/rejected": -0.40553051233291626, "step": 1825 }, { "epoch": 4.999315537303217, "grad_norm": 3.446409225463867, "learning_rate": 7.498630136986301e-07, "log_odds_chosen": 1.8418526649475098, "log_odds_ratio": -0.8203193545341492, "logits/chosen": 0.9659011363983154, "logits/rejected": 0.8715794682502747, "logps/chosen": -2.332655191421509, "logps/rejected": -3.950716495513916, "loss": 0.7819, "nll_loss": 0.6998987197875977, "rewards/accuracies": 0.875, "rewards/chosen": -0.23326550424098969, "rewards/margins": 0.1618061363697052, "rewards/rejected": -0.3950716555118561, "step": 1826 }, { "epoch": 5.002053388090349, "grad_norm": 2.1292049884796143, "learning_rate": 7.497260273972602e-07, "log_odds_chosen": 2.058215618133545, "log_odds_ratio": -0.34238648414611816, "logits/chosen": 0.541968047618866, "logits/rejected": 0.48706820607185364, "logps/chosen": -1.8692961931228638, "logps/rejected": -3.8243932723999023, "loss": 0.8085, "nll_loss": 0.7742785215377808, "rewards/accuracies": 1.0, "rewards/chosen": -0.1869296282529831, "rewards/margins": 0.19550973176956177, "rewards/rejected": -0.38243934512138367, "step": 1827 }, { "epoch": 5.004791238877481, "grad_norm": 2.5923287868499756, "learning_rate": 7.495890410958904e-07, "log_odds_chosen": 2.3101868629455566, "log_odds_ratio": -0.3215366005897522, "logits/chosen": 0.9872307777404785, "logits/rejected": 0.9762537479400635, "logps/chosen": -2.0023648738861084, "logps/rejected": -4.134528636932373, "loss": 0.7705, "nll_loss": 0.7383220195770264, "rewards/accuracies": 1.0, "rewards/chosen": -0.2002364993095398, "rewards/margins": 0.2132163643836975, "rewards/rejected": -0.4134528636932373, "step": 1828 }, { "epoch": 5.007529089664613, "grad_norm": 2.4014179706573486, "learning_rate": 7.494520547945205e-07, "log_odds_chosen": 3.2818379402160645, "log_odds_ratio": -0.18405690789222717, "logits/chosen": 0.8267273902893066, "logits/rejected": 0.7911966443061829, "logps/chosen": -1.3930127620697021, "logps/rejected": -4.4033331871032715, "loss": 0.7472, "nll_loss": 0.7288229465484619, "rewards/accuracies": 1.0, "rewards/chosen": -0.13930127024650574, "rewards/margins": 0.30103200674057007, "rewards/rejected": -0.4403333365917206, "step": 1829 }, { "epoch": 5.0102669404517455, "grad_norm": 2.670243978500366, "learning_rate": 7.493150684931506e-07, "log_odds_chosen": 1.767576813697815, "log_odds_ratio": -0.2950003743171692, "logits/chosen": 0.42673808336257935, "logits/rejected": 0.33506515622138977, "logps/chosen": -1.3121296167373657, "logps/rejected": -2.8673386573791504, "loss": 0.8403, "nll_loss": 0.8107934594154358, "rewards/accuracies": 0.875, "rewards/chosen": -0.1312129646539688, "rewards/margins": 0.15552091598510742, "rewards/rejected": -0.28673386573791504, "step": 1830 }, { "epoch": 5.0130047912388775, "grad_norm": 2.4942073822021484, "learning_rate": 7.491780821917808e-07, "log_odds_chosen": 1.817672848701477, "log_odds_ratio": -0.2857142388820648, "logits/chosen": 0.5901303887367249, "logits/rejected": 0.5721211433410645, "logps/chosen": -1.7509843111038208, "logps/rejected": -3.4235904216766357, "loss": 0.8459, "nll_loss": 0.8173558712005615, "rewards/accuracies": 0.875, "rewards/chosen": -0.1750984489917755, "rewards/margins": 0.16726058721542358, "rewards/rejected": -0.3423590064048767, "step": 1831 }, { "epoch": 5.01574264202601, "grad_norm": 5.365031719207764, "learning_rate": 7.49041095890411e-07, "log_odds_chosen": 0.9628040790557861, "log_odds_ratio": -0.8082484006881714, "logits/chosen": 0.6514946818351746, "logits/rejected": 0.5991340279579163, "logps/chosen": -2.230289936065674, "logps/rejected": -3.0836267471313477, "loss": 0.8399, "nll_loss": 0.7591181397438049, "rewards/accuracies": 0.875, "rewards/chosen": -0.2230290025472641, "rewards/margins": 0.0853336900472641, "rewards/rejected": -0.3083626925945282, "step": 1832 }, { "epoch": 5.018480492813142, "grad_norm": 3.2438061237335205, "learning_rate": 7.48904109589041e-07, "log_odds_chosen": 1.7129151821136475, "log_odds_ratio": -0.3779635429382324, "logits/chosen": 0.8292688131332397, "logits/rejected": 0.8182573318481445, "logps/chosen": -3.037491798400879, "logps/rejected": -4.679767608642578, "loss": 0.7874, "nll_loss": 0.7496449947357178, "rewards/accuracies": 0.875, "rewards/chosen": -0.3037492036819458, "rewards/margins": 0.16422760486602783, "rewards/rejected": -0.46797677874565125, "step": 1833 }, { "epoch": 5.021218343600274, "grad_norm": 2.4476444721221924, "learning_rate": 7.487671232876712e-07, "log_odds_chosen": 1.4822492599487305, "log_odds_ratio": -0.3212554454803467, "logits/chosen": 1.0253560543060303, "logits/rejected": 1.0437541007995605, "logps/chosen": -2.1241188049316406, "logps/rejected": -3.490680694580078, "loss": 0.704, "nll_loss": 0.6718448400497437, "rewards/accuracies": 1.0, "rewards/chosen": -0.21241188049316406, "rewards/margins": 0.13665618002414703, "rewards/rejected": -0.3490680456161499, "step": 1834 }, { "epoch": 5.023956194387406, "grad_norm": 5.329412937164307, "learning_rate": 7.486301369863013e-07, "log_odds_chosen": 1.4536211490631104, "log_odds_ratio": -0.47716009616851807, "logits/chosen": 0.7806190252304077, "logits/rejected": 0.729411244392395, "logps/chosen": -2.6265487670898438, "logps/rejected": -4.000816345214844, "loss": 0.8652, "nll_loss": 0.8175109624862671, "rewards/accuracies": 0.875, "rewards/chosen": -0.2626549005508423, "rewards/margins": 0.1374267041683197, "rewards/rejected": -0.4000815749168396, "step": 1835 }, { "epoch": 5.026694045174538, "grad_norm": 2.3036553859710693, "learning_rate": 7.484931506849314e-07, "log_odds_chosen": 1.462348222732544, "log_odds_ratio": -0.3749794363975525, "logits/chosen": 0.7203288674354553, "logits/rejected": 0.717051088809967, "logps/chosen": -1.9044198989868164, "logps/rejected": -3.268918991088867, "loss": 0.769, "nll_loss": 0.7315205335617065, "rewards/accuracies": 0.875, "rewards/chosen": -0.1904420107603073, "rewards/margins": 0.1364499032497406, "rewards/rejected": -0.3268918991088867, "step": 1836 }, { "epoch": 5.02943189596167, "grad_norm": 2.6483378410339355, "learning_rate": 7.483561643835616e-07, "log_odds_chosen": 2.82964825630188, "log_odds_ratio": -0.23986464738845825, "logits/chosen": 0.8582470417022705, "logits/rejected": 0.8262251615524292, "logps/chosen": -2.0369694232940674, "logps/rejected": -4.728384971618652, "loss": 0.7994, "nll_loss": 0.7754219770431519, "rewards/accuracies": 1.0, "rewards/chosen": -0.20369693636894226, "rewards/margins": 0.26914161443710327, "rewards/rejected": -0.47283855080604553, "step": 1837 }, { "epoch": 5.032169746748802, "grad_norm": 2.4332399368286133, "learning_rate": 7.482191780821917e-07, "log_odds_chosen": 2.8335578441619873, "log_odds_ratio": -0.29185670614242554, "logits/chosen": 0.9498745203018188, "logits/rejected": 0.9476138353347778, "logps/chosen": -1.7507028579711914, "logps/rejected": -4.423709392547607, "loss": 0.79, "nll_loss": 0.7608316540718079, "rewards/accuracies": 1.0, "rewards/chosen": -0.17507027089595795, "rewards/margins": 0.26730066537857056, "rewards/rejected": -0.4423709511756897, "step": 1838 }, { "epoch": 5.034907597535934, "grad_norm": 2.5630602836608887, "learning_rate": 7.480821917808218e-07, "log_odds_chosen": 1.158510446548462, "log_odds_ratio": -0.3898903727531433, "logits/chosen": 0.7628055214881897, "logits/rejected": 0.7562524676322937, "logps/chosen": -1.8866820335388184, "logps/rejected": -2.97646427154541, "loss": 0.7672, "nll_loss": 0.7282496690750122, "rewards/accuracies": 0.875, "rewards/chosen": -0.18866819143295288, "rewards/margins": 0.10897823423147202, "rewards/rejected": -0.2976464033126831, "step": 1839 }, { "epoch": 5.037645448323066, "grad_norm": 2.9660205841064453, "learning_rate": 7.47945205479452e-07, "log_odds_chosen": 0.8699740767478943, "log_odds_ratio": -0.4801923632621765, "logits/chosen": 1.215750813484192, "logits/rejected": 1.202343463897705, "logps/chosen": -2.8828845024108887, "logps/rejected": -3.6802072525024414, "loss": 0.7667, "nll_loss": 0.7187056541442871, "rewards/accuracies": 0.75, "rewards/chosen": -0.2882884740829468, "rewards/margins": 0.07973223924636841, "rewards/rejected": -0.3680207133293152, "step": 1840 }, { "epoch": 5.040383299110198, "grad_norm": 4.201040267944336, "learning_rate": 7.478082191780821e-07, "log_odds_chosen": 2.072807550430298, "log_odds_ratio": -0.2463618814945221, "logits/chosen": 0.7439273595809937, "logits/rejected": 0.7237780094146729, "logps/chosen": -2.3679184913635254, "logps/rejected": -4.310836315155029, "loss": 0.9177, "nll_loss": 0.8930858969688416, "rewards/accuracies": 1.0, "rewards/chosen": -0.23679186403751373, "rewards/margins": 0.19429177045822144, "rewards/rejected": -0.431083619594574, "step": 1841 }, { "epoch": 5.04312114989733, "grad_norm": 2.9319496154785156, "learning_rate": 7.476712328767123e-07, "log_odds_chosen": 1.092612385749817, "log_odds_ratio": -0.4422815442085266, "logits/chosen": 0.7961905002593994, "logits/rejected": 0.7749065160751343, "logps/chosen": -2.795999050140381, "logps/rejected": -3.8266186714172363, "loss": 0.8308, "nll_loss": 0.786541223526001, "rewards/accuracies": 0.875, "rewards/chosen": -0.2795999050140381, "rewards/margins": 0.10306194424629211, "rewards/rejected": -0.3826618492603302, "step": 1842 }, { "epoch": 5.045859000684462, "grad_norm": 2.034423589706421, "learning_rate": 7.475342465753424e-07, "log_odds_chosen": 4.643048286437988, "log_odds_ratio": -0.06509815901517868, "logits/chosen": 0.8304201364517212, "logits/rejected": 0.7900599837303162, "logps/chosen": -1.8941268920898438, "logps/rejected": -6.308211803436279, "loss": 0.7615, "nll_loss": 0.7549766302108765, "rewards/accuracies": 1.0, "rewards/chosen": -0.1894126981496811, "rewards/margins": 0.44140851497650146, "rewards/rejected": -0.6308212280273438, "step": 1843 }, { "epoch": 5.048596851471594, "grad_norm": 2.5046794414520264, "learning_rate": 7.473972602739725e-07, "log_odds_chosen": 2.5832619667053223, "log_odds_ratio": -0.23968392610549927, "logits/chosen": 0.8651726245880127, "logits/rejected": 0.8259950876235962, "logps/chosen": -2.9393014907836914, "logps/rejected": -5.371975421905518, "loss": 0.8334, "nll_loss": 0.8093987703323364, "rewards/accuracies": 0.75, "rewards/chosen": -0.2939301133155823, "rewards/margins": 0.24326743185520172, "rewards/rejected": -0.5371975898742676, "step": 1844 }, { "epoch": 5.0513347022587265, "grad_norm": 3.590475082397461, "learning_rate": 7.472602739726027e-07, "log_odds_chosen": 1.4400262832641602, "log_odds_ratio": -0.6280423998832703, "logits/chosen": 0.956902265548706, "logits/rejected": 0.9419177174568176, "logps/chosen": -2.5838756561279297, "logps/rejected": -3.902393341064453, "loss": 0.7981, "nll_loss": 0.7352609038352966, "rewards/accuracies": 0.5, "rewards/chosen": -0.25838756561279297, "rewards/margins": 0.13185177743434906, "rewards/rejected": -0.39023932814598083, "step": 1845 }, { "epoch": 5.054072553045859, "grad_norm": 4.094518661499023, "learning_rate": 7.471232876712328e-07, "log_odds_chosen": 1.6273354291915894, "log_odds_ratio": -0.3820542097091675, "logits/chosen": 1.0319536924362183, "logits/rejected": 1.0442991256713867, "logps/chosen": -2.2042322158813477, "logps/rejected": -3.705397129058838, "loss": 0.7728, "nll_loss": 0.7346058487892151, "rewards/accuracies": 0.75, "rewards/chosen": -0.22042322158813477, "rewards/margins": 0.15011650323867798, "rewards/rejected": -0.37053972482681274, "step": 1846 }, { "epoch": 5.0568104038329915, "grad_norm": 3.039016008377075, "learning_rate": 7.469863013698629e-07, "log_odds_chosen": 2.8668370246887207, "log_odds_ratio": -0.1913500279188156, "logits/chosen": 0.7588675618171692, "logits/rejected": 0.7536637783050537, "logps/chosen": -1.8235228061676025, "logps/rejected": -4.4800872802734375, "loss": 0.7653, "nll_loss": 0.7461631298065186, "rewards/accuracies": 1.0, "rewards/chosen": -0.18235228955745697, "rewards/margins": 0.265656441450119, "rewards/rejected": -0.4480087161064148, "step": 1847 }, { "epoch": 5.059548254620124, "grad_norm": 5.793539047241211, "learning_rate": 7.468493150684931e-07, "log_odds_chosen": 0.12003850936889648, "log_odds_ratio": -0.8495649099349976, "logits/chosen": 0.7840549349784851, "logits/rejected": 0.7548342347145081, "logps/chosen": -2.5871407985687256, "logps/rejected": -2.6940927505493164, "loss": 0.8528, "nll_loss": 0.7678123712539673, "rewards/accuracies": 0.625, "rewards/chosen": -0.25871410965919495, "rewards/margins": 0.010695166885852814, "rewards/rejected": -0.26940926909446716, "step": 1848 }, { "epoch": 5.062286105407256, "grad_norm": 3.752354145050049, "learning_rate": 7.467123287671232e-07, "log_odds_chosen": 2.6763625144958496, "log_odds_ratio": -0.7990717887878418, "logits/chosen": 1.126122236251831, "logits/rejected": 1.1713597774505615, "logps/chosen": -3.421809196472168, "logps/rejected": -6.001793384552002, "loss": 0.7809, "nll_loss": 0.7009989619255066, "rewards/accuracies": 0.875, "rewards/chosen": -0.34218090772628784, "rewards/margins": 0.2579984664916992, "rewards/rejected": -0.6001793742179871, "step": 1849 }, { "epoch": 5.065023956194388, "grad_norm": 2.738877296447754, "learning_rate": 7.465753424657533e-07, "log_odds_chosen": 1.9258121252059937, "log_odds_ratio": -0.4958805441856384, "logits/chosen": 0.8858081102371216, "logits/rejected": 0.805970311164856, "logps/chosen": -1.8365741968154907, "logps/rejected": -3.6770739555358887, "loss": 0.794, "nll_loss": 0.744434654712677, "rewards/accuracies": 0.625, "rewards/chosen": -0.1836574375629425, "rewards/margins": 0.18404997885227203, "rewards/rejected": -0.36770740151405334, "step": 1850 }, { "epoch": 5.06776180698152, "grad_norm": 3.319103240966797, "learning_rate": 7.464383561643835e-07, "log_odds_chosen": 1.9272500276565552, "log_odds_ratio": -0.3732202649116516, "logits/chosen": 0.7937363386154175, "logits/rejected": 0.7746055126190186, "logps/chosen": -2.767545461654663, "logps/rejected": -4.6088643074035645, "loss": 0.8647, "nll_loss": 0.8274224996566772, "rewards/accuracies": 0.75, "rewards/chosen": -0.2767545282840729, "rewards/margins": 0.18413189053535461, "rewards/rejected": -0.4608864188194275, "step": 1851 }, { "epoch": 5.070499657768652, "grad_norm": 2.5282630920410156, "learning_rate": 7.463013698630136e-07, "log_odds_chosen": 1.8971163034439087, "log_odds_ratio": -0.26946762204170227, "logits/chosen": 0.9897031784057617, "logits/rejected": 1.029072642326355, "logps/chosen": -1.8632943630218506, "logps/rejected": -3.5585532188415527, "loss": 0.7014, "nll_loss": 0.6744210124015808, "rewards/accuracies": 0.875, "rewards/chosen": -0.1863294243812561, "rewards/margins": 0.1695258915424347, "rewards/rejected": -0.3558553159236908, "step": 1852 }, { "epoch": 5.073237508555784, "grad_norm": 2.467402219772339, "learning_rate": 7.461643835616437e-07, "log_odds_chosen": 1.8455719947814941, "log_odds_ratio": -0.26133450865745544, "logits/chosen": 0.7308624386787415, "logits/rejected": 0.7285088300704956, "logps/chosen": -2.3575570583343506, "logps/rejected": -4.104789733886719, "loss": 0.8412, "nll_loss": 0.8150984644889832, "rewards/accuracies": 1.0, "rewards/chosen": -0.23575571179389954, "rewards/margins": 0.17472326755523682, "rewards/rejected": -0.41047897934913635, "step": 1853 }, { "epoch": 5.075975359342916, "grad_norm": 2.549896478652954, "learning_rate": 7.46027397260274e-07, "log_odds_chosen": 1.3232722282409668, "log_odds_ratio": -0.3515109121799469, "logits/chosen": 0.7214624881744385, "logits/rejected": 0.7791963815689087, "logps/chosen": -1.898834466934204, "logps/rejected": -3.1309378147125244, "loss": 0.8141, "nll_loss": 0.7789046168327332, "rewards/accuracies": 0.875, "rewards/chosen": -0.18988344073295593, "rewards/margins": 0.1232103556394577, "rewards/rejected": -0.31309378147125244, "step": 1854 }, { "epoch": 5.078713210130048, "grad_norm": 2.743701219558716, "learning_rate": 7.45890410958904e-07, "log_odds_chosen": 1.8614132404327393, "log_odds_ratio": -0.3010859489440918, "logits/chosen": 0.9352229833602905, "logits/rejected": 0.9082017540931702, "logps/chosen": -1.9868398904800415, "logps/rejected": -3.724764347076416, "loss": 0.7836, "nll_loss": 0.7534517049789429, "rewards/accuracies": 0.875, "rewards/chosen": -0.19868400692939758, "rewards/margins": 0.17379240691661835, "rewards/rejected": -0.3724764287471771, "step": 1855 }, { "epoch": 5.08145106091718, "grad_norm": 2.814160108566284, "learning_rate": 7.457534246575343e-07, "log_odds_chosen": 1.406056523323059, "log_odds_ratio": -0.38967663049697876, "logits/chosen": 0.7072105407714844, "logits/rejected": 0.7008631229400635, "logps/chosen": -1.864931583404541, "logps/rejected": -3.175553321838379, "loss": 0.8719, "nll_loss": 0.8328987956047058, "rewards/accuracies": 0.875, "rewards/chosen": -0.1864931732416153, "rewards/margins": 0.13106215000152588, "rewards/rejected": -0.31755533814430237, "step": 1856 }, { "epoch": 5.084188911704312, "grad_norm": 3.0204756259918213, "learning_rate": 7.456164383561644e-07, "log_odds_chosen": 1.2199277877807617, "log_odds_ratio": -0.4388078451156616, "logits/chosen": 0.9356094598770142, "logits/rejected": 0.9374633431434631, "logps/chosen": -2.1464943885803223, "logps/rejected": -3.229288339614868, "loss": 0.7902, "nll_loss": 0.7463161945343018, "rewards/accuracies": 0.875, "rewards/chosen": -0.21464943885803223, "rewards/margins": 0.10827940702438354, "rewards/rejected": -0.32292884588241577, "step": 1857 }, { "epoch": 5.086926762491444, "grad_norm": 3.9983432292938232, "learning_rate": 7.454794520547945e-07, "log_odds_chosen": 0.6958967447280884, "log_odds_ratio": -0.6066694855690002, "logits/chosen": 0.7995589971542358, "logits/rejected": 0.7463206052780151, "logps/chosen": -2.08976411819458, "logps/rejected": -2.6822047233581543, "loss": 0.8356, "nll_loss": 0.7749617099761963, "rewards/accuracies": 0.625, "rewards/chosen": -0.20897641777992249, "rewards/margins": 0.0592440664768219, "rewards/rejected": -0.2682204842567444, "step": 1858 }, { "epoch": 5.089664613278576, "grad_norm": 2.497436046600342, "learning_rate": 7.453424657534247e-07, "log_odds_chosen": 1.9241037368774414, "log_odds_ratio": -0.21795450150966644, "logits/chosen": 0.9663122892379761, "logits/rejected": 0.9977999329566956, "logps/chosen": -2.2688684463500977, "logps/rejected": -4.110288619995117, "loss": 0.7545, "nll_loss": 0.732719898223877, "rewards/accuracies": 1.0, "rewards/chosen": -0.22688686847686768, "rewards/margins": 0.18414199352264404, "rewards/rejected": -0.4110288619995117, "step": 1859 }, { "epoch": 5.092402464065708, "grad_norm": 2.2726807594299316, "learning_rate": 7.452054794520548e-07, "log_odds_chosen": 3.154531955718994, "log_odds_ratio": -0.25232434272766113, "logits/chosen": 0.9183655977249146, "logits/rejected": 0.9216046333312988, "logps/chosen": -1.8815208673477173, "logps/rejected": -4.820564270019531, "loss": 0.7809, "nll_loss": 0.7556697130203247, "rewards/accuracies": 0.875, "rewards/chosen": -0.18815210461616516, "rewards/margins": 0.2939043343067169, "rewards/rejected": -0.4820564091205597, "step": 1860 }, { "epoch": 5.0951403148528405, "grad_norm": 3.530883550643921, "learning_rate": 7.450684931506849e-07, "log_odds_chosen": 0.7190248370170593, "log_odds_ratio": -0.5619794726371765, "logits/chosen": 0.7110005617141724, "logits/rejected": 0.6682146787643433, "logps/chosen": -3.545647621154785, "logps/rejected": -4.2365946769714355, "loss": 0.8551, "nll_loss": 0.7988759279251099, "rewards/accuracies": 0.625, "rewards/chosen": -0.3545647859573364, "rewards/margins": 0.06909467279911041, "rewards/rejected": -0.42365947365760803, "step": 1861 }, { "epoch": 5.0978781656399725, "grad_norm": 2.4265031814575195, "learning_rate": 7.449315068493151e-07, "log_odds_chosen": 3.5979700088500977, "log_odds_ratio": -0.0676141083240509, "logits/chosen": 1.1182528734207153, "logits/rejected": 1.2098969221115112, "logps/chosen": -2.1527011394500732, "logps/rejected": -5.606542587280273, "loss": 0.6998, "nll_loss": 0.6930034160614014, "rewards/accuracies": 1.0, "rewards/chosen": -0.21527013182640076, "rewards/margins": 0.3453841507434845, "rewards/rejected": -0.5606542825698853, "step": 1862 }, { "epoch": 5.100616016427105, "grad_norm": 2.264207124710083, "learning_rate": 7.447945205479453e-07, "log_odds_chosen": 2.587474822998047, "log_odds_ratio": -0.24022315442562103, "logits/chosen": 0.7153505086898804, "logits/rejected": 0.721889853477478, "logps/chosen": -1.933430552482605, "logps/rejected": -4.399825096130371, "loss": 0.7111, "nll_loss": 0.6870452761650085, "rewards/accuracies": 0.875, "rewards/chosen": -0.19334304332733154, "rewards/margins": 0.2466394603252411, "rewards/rejected": -0.439982533454895, "step": 1863 }, { "epoch": 5.103353867214237, "grad_norm": 2.628434658050537, "learning_rate": 7.446575342465753e-07, "log_odds_chosen": 2.290688991546631, "log_odds_ratio": -0.2844262421131134, "logits/chosen": 0.4979134500026703, "logits/rejected": 0.43602997064590454, "logps/chosen": -2.342560291290283, "logps/rejected": -4.492664813995361, "loss": 0.7779, "nll_loss": 0.7494949102401733, "rewards/accuracies": 0.875, "rewards/chosen": -0.23425604403018951, "rewards/margins": 0.21501044929027557, "rewards/rejected": -0.4492664933204651, "step": 1864 }, { "epoch": 5.106091718001369, "grad_norm": 2.372128963470459, "learning_rate": 7.445205479452055e-07, "log_odds_chosen": 2.382516384124756, "log_odds_ratio": -0.21922339498996735, "logits/chosen": 0.9081313610076904, "logits/rejected": 0.8924444913864136, "logps/chosen": -1.673319697380066, "logps/rejected": -3.886503219604492, "loss": 0.7484, "nll_loss": 0.7265027165412903, "rewards/accuracies": 1.0, "rewards/chosen": -0.1673319786787033, "rewards/margins": 0.2213183492422104, "rewards/rejected": -0.3886503577232361, "step": 1865 }, { "epoch": 5.108829568788501, "grad_norm": 2.40346622467041, "learning_rate": 7.443835616438356e-07, "log_odds_chosen": 1.0907580852508545, "log_odds_ratio": -0.323922336101532, "logits/chosen": 0.920150637626648, "logits/rejected": 0.8943449854850769, "logps/chosen": -1.7490475177764893, "logps/rejected": -2.691871166229248, "loss": 0.6911, "nll_loss": 0.6587258577346802, "rewards/accuracies": 1.0, "rewards/chosen": -0.1749047487974167, "rewards/margins": 0.0942823588848114, "rewards/rejected": -0.2691870927810669, "step": 1866 }, { "epoch": 5.111567419575633, "grad_norm": 2.2803738117218018, "learning_rate": 7.442465753424657e-07, "log_odds_chosen": 1.5960414409637451, "log_odds_ratio": -0.280283123254776, "logits/chosen": 0.9079105854034424, "logits/rejected": 0.8520791530609131, "logps/chosen": -1.5556700229644775, "logps/rejected": -2.977304458618164, "loss": 0.7321, "nll_loss": 0.7040957808494568, "rewards/accuracies": 1.0, "rewards/chosen": -0.1555669903755188, "rewards/margins": 0.1421634405851364, "rewards/rejected": -0.2977304458618164, "step": 1867 }, { "epoch": 5.114305270362765, "grad_norm": 2.4423720836639404, "learning_rate": 7.441095890410959e-07, "log_odds_chosen": 2.685004949569702, "log_odds_ratio": -0.23188775777816772, "logits/chosen": 0.874982476234436, "logits/rejected": 0.8844479322433472, "logps/chosen": -1.664372205734253, "logps/rejected": -4.177992820739746, "loss": 0.7405, "nll_loss": 0.7172847986221313, "rewards/accuracies": 1.0, "rewards/chosen": -0.16643720865249634, "rewards/margins": 0.25136205554008484, "rewards/rejected": -0.4177992343902588, "step": 1868 }, { "epoch": 5.117043121149897, "grad_norm": 2.2324867248535156, "learning_rate": 7.43972602739726e-07, "log_odds_chosen": 1.592147946357727, "log_odds_ratio": -0.3219042122364044, "logits/chosen": 0.9487059712409973, "logits/rejected": 0.9347022175788879, "logps/chosen": -1.9399192333221436, "logps/rejected": -3.3590588569641113, "loss": 0.7279, "nll_loss": 0.6957513093948364, "rewards/accuracies": 0.875, "rewards/chosen": -0.19399192929267883, "rewards/margins": 0.1419139802455902, "rewards/rejected": -0.33590590953826904, "step": 1869 }, { "epoch": 5.119780971937029, "grad_norm": 2.682201623916626, "learning_rate": 7.438356164383562e-07, "log_odds_chosen": 2.6736178398132324, "log_odds_ratio": -0.2535780668258667, "logits/chosen": 0.6193608045578003, "logits/rejected": 0.5449752807617188, "logps/chosen": -1.9968793392181396, "logps/rejected": -4.542247772216797, "loss": 0.8329, "nll_loss": 0.8075229525566101, "rewards/accuracies": 1.0, "rewards/chosen": -0.19968795776367188, "rewards/margins": 0.25453686714172363, "rewards/rejected": -0.4542248547077179, "step": 1870 }, { "epoch": 5.122518822724161, "grad_norm": 4.172736167907715, "learning_rate": 7.436986301369863e-07, "log_odds_chosen": 1.2610645294189453, "log_odds_ratio": -0.49374669790267944, "logits/chosen": 0.6757463812828064, "logits/rejected": 0.5835142731666565, "logps/chosen": -2.1446683406829834, "logps/rejected": -3.2519235610961914, "loss": 0.9139, "nll_loss": 0.8645326495170593, "rewards/accuracies": 0.875, "rewards/chosen": -0.21446684002876282, "rewards/margins": 0.1107255220413208, "rewards/rejected": -0.3251923620700836, "step": 1871 }, { "epoch": 5.125256673511293, "grad_norm": 2.358905553817749, "learning_rate": 7.435616438356164e-07, "log_odds_chosen": 2.0773792266845703, "log_odds_ratio": -0.1674351543188095, "logits/chosen": 0.7979813814163208, "logits/rejected": 0.7305139303207397, "logps/chosen": -2.3748862743377686, "logps/rejected": -4.302833080291748, "loss": 0.8131, "nll_loss": 0.796328067779541, "rewards/accuracies": 1.0, "rewards/chosen": -0.23748862743377686, "rewards/margins": 0.19279468059539795, "rewards/rejected": -0.4302833080291748, "step": 1872 }, { "epoch": 5.127994524298426, "grad_norm": 2.566237211227417, "learning_rate": 7.434246575342466e-07, "log_odds_chosen": 1.5763092041015625, "log_odds_ratio": -0.24069565534591675, "logits/chosen": 0.689937949180603, "logits/rejected": 0.6716222763061523, "logps/chosen": -2.070878744125366, "logps/rejected": -3.509322166442871, "loss": 0.7482, "nll_loss": 0.7241348028182983, "rewards/accuracies": 1.0, "rewards/chosen": -0.20708787441253662, "rewards/margins": 0.143844336271286, "rewards/rejected": -0.350932240486145, "step": 1873 }, { "epoch": 5.130732375085558, "grad_norm": 2.271010637283325, "learning_rate": 7.432876712328767e-07, "log_odds_chosen": 0.6727263927459717, "log_odds_ratio": -0.571556568145752, "logits/chosen": 0.8507658243179321, "logits/rejected": 0.8678326606750488, "logps/chosen": -2.160155773162842, "logps/rejected": -2.786646604537964, "loss": 0.8566, "nll_loss": 0.7994909286499023, "rewards/accuracies": 0.625, "rewards/chosen": -0.21601556241512299, "rewards/margins": 0.06264910846948624, "rewards/rejected": -0.27866464853286743, "step": 1874 }, { "epoch": 5.13347022587269, "grad_norm": 2.3487284183502197, "learning_rate": 7.431506849315068e-07, "log_odds_chosen": 1.7270734310150146, "log_odds_ratio": -0.31081268191337585, "logits/chosen": 0.6065278649330139, "logits/rejected": 0.5539295673370361, "logps/chosen": -1.8934510946273804, "logps/rejected": -3.4535837173461914, "loss": 0.8717, "nll_loss": 0.8405790328979492, "rewards/accuracies": 0.875, "rewards/chosen": -0.1893451064825058, "rewards/margins": 0.15601326525211334, "rewards/rejected": -0.34535837173461914, "step": 1875 }, { "epoch": 5.136208076659822, "grad_norm": 3.405575752258301, "learning_rate": 7.43013698630137e-07, "log_odds_chosen": 2.3114285469055176, "log_odds_ratio": -0.24065019190311432, "logits/chosen": 1.0524623394012451, "logits/rejected": 1.0341962575912476, "logps/chosen": -2.879108190536499, "logps/rejected": -5.092921733856201, "loss": 0.7542, "nll_loss": 0.7301123738288879, "rewards/accuracies": 1.0, "rewards/chosen": -0.2879108488559723, "rewards/margins": 0.22138135135173798, "rewards/rejected": -0.5092921853065491, "step": 1876 }, { "epoch": 5.138945927446954, "grad_norm": 4.38960599899292, "learning_rate": 7.428767123287672e-07, "log_odds_chosen": 0.648294985294342, "log_odds_ratio": -0.570819616317749, "logits/chosen": 0.803874135017395, "logits/rejected": 0.7399413585662842, "logps/chosen": -2.15909481048584, "logps/rejected": -2.7123336791992188, "loss": 0.822, "nll_loss": 0.7648860216140747, "rewards/accuracies": 0.75, "rewards/chosen": -0.21590949594974518, "rewards/margins": 0.05532389506697655, "rewards/rejected": -0.27123337984085083, "step": 1877 }, { "epoch": 5.1416837782340865, "grad_norm": 2.766071081161499, "learning_rate": 7.427397260273972e-07, "log_odds_chosen": 2.108250617980957, "log_odds_ratio": -0.18364217877388, "logits/chosen": 0.9366859197616577, "logits/rejected": 0.900458037853241, "logps/chosen": -2.313904285430908, "logps/rejected": -4.305307388305664, "loss": 0.7628, "nll_loss": 0.7444454431533813, "rewards/accuracies": 1.0, "rewards/chosen": -0.23139041662216187, "rewards/margins": 0.19914031028747559, "rewards/rejected": -0.43053072690963745, "step": 1878 }, { "epoch": 5.144421629021219, "grad_norm": 2.818591833114624, "learning_rate": 7.426027397260274e-07, "log_odds_chosen": 1.9198834896087646, "log_odds_ratio": -0.3144164979457855, "logits/chosen": 0.6830695271492004, "logits/rejected": 0.6544544100761414, "logps/chosen": -2.088931083679199, "logps/rejected": -3.9162564277648926, "loss": 0.7618, "nll_loss": 0.7303447723388672, "rewards/accuracies": 0.875, "rewards/chosen": -0.20889310538768768, "rewards/margins": 0.18273253738880157, "rewards/rejected": -0.39162564277648926, "step": 1879 }, { "epoch": 5.147159479808351, "grad_norm": 3.2476704120635986, "learning_rate": 7.424657534246575e-07, "log_odds_chosen": 2.1881394386291504, "log_odds_ratio": -0.5072773694992065, "logits/chosen": 0.739805281162262, "logits/rejected": 0.7392911911010742, "logps/chosen": -3.494904041290283, "logps/rejected": -5.591188907623291, "loss": 0.8645, "nll_loss": 0.8138068318367004, "rewards/accuracies": 0.875, "rewards/chosen": -0.3494904041290283, "rewards/margins": 0.20962849259376526, "rewards/rejected": -0.5591188669204712, "step": 1880 }, { "epoch": 5.149897330595483, "grad_norm": 2.177128314971924, "learning_rate": 7.423287671232876e-07, "log_odds_chosen": 1.5001590251922607, "log_odds_ratio": -0.26479682326316833, "logits/chosen": 0.9025194048881531, "logits/rejected": 0.904016375541687, "logps/chosen": -1.9301347732543945, "logps/rejected": -3.2875258922576904, "loss": 0.6901, "nll_loss": 0.6635945439338684, "rewards/accuracies": 1.0, "rewards/chosen": -0.1930134892463684, "rewards/margins": 0.13573913276195526, "rewards/rejected": -0.3287526071071625, "step": 1881 }, { "epoch": 5.152635181382615, "grad_norm": 2.6657180786132812, "learning_rate": 7.421917808219178e-07, "log_odds_chosen": 2.2645742893218994, "log_odds_ratio": -0.21851184964179993, "logits/chosen": 0.7012708187103271, "logits/rejected": 0.6904644966125488, "logps/chosen": -2.196446180343628, "logps/rejected": -4.339534759521484, "loss": 0.8581, "nll_loss": 0.8362758159637451, "rewards/accuracies": 1.0, "rewards/chosen": -0.21964463591575623, "rewards/margins": 0.21430887281894684, "rewards/rejected": -0.43395349383354187, "step": 1882 }, { "epoch": 5.155373032169747, "grad_norm": 2.7602386474609375, "learning_rate": 7.420547945205479e-07, "log_odds_chosen": 3.798914909362793, "log_odds_ratio": -0.17071032524108887, "logits/chosen": 0.8203669786453247, "logits/rejected": 0.8161133527755737, "logps/chosen": -2.5605664253234863, "logps/rejected": -6.161721706390381, "loss": 0.729, "nll_loss": 0.7119192481040955, "rewards/accuracies": 1.0, "rewards/chosen": -0.25605666637420654, "rewards/margins": 0.36011552810668945, "rewards/rejected": -0.616172194480896, "step": 1883 }, { "epoch": 5.158110882956879, "grad_norm": 2.667149782180786, "learning_rate": 7.419178082191781e-07, "log_odds_chosen": 1.98054838180542, "log_odds_ratio": -0.17832669615745544, "logits/chosen": 0.6896505355834961, "logits/rejected": 0.6734850406646729, "logps/chosen": -1.8324905633926392, "logps/rejected": -3.6673789024353027, "loss": 0.701, "nll_loss": 0.6831656098365784, "rewards/accuracies": 1.0, "rewards/chosen": -0.1832490712404251, "rewards/margins": 0.18348883092403412, "rewards/rejected": -0.36673790216445923, "step": 1884 }, { "epoch": 5.160848733744011, "grad_norm": 2.753068685531616, "learning_rate": 7.417808219178082e-07, "log_odds_chosen": 1.214908480644226, "log_odds_ratio": -0.35032975673675537, "logits/chosen": 0.758185863494873, "logits/rejected": 0.7331575751304626, "logps/chosen": -1.5046387910842896, "logps/rejected": -2.587608575820923, "loss": 0.748, "nll_loss": 0.7129663825035095, "rewards/accuracies": 1.0, "rewards/chosen": -0.15046387910842896, "rewards/margins": 0.10829699039459229, "rewards/rejected": -0.25876086950302124, "step": 1885 }, { "epoch": 5.163586584531143, "grad_norm": 2.6640524864196777, "learning_rate": 7.416438356164383e-07, "log_odds_chosen": 2.1732122898101807, "log_odds_ratio": -0.27530908584594727, "logits/chosen": 0.8953545093536377, "logits/rejected": 0.900448203086853, "logps/chosen": -2.3599703311920166, "logps/rejected": -4.432354927062988, "loss": 0.7513, "nll_loss": 0.7237982749938965, "rewards/accuracies": 0.75, "rewards/chosen": -0.2359970360994339, "rewards/margins": 0.20723846554756165, "rewards/rejected": -0.44323551654815674, "step": 1886 }, { "epoch": 5.166324435318275, "grad_norm": 2.1657021045684814, "learning_rate": 7.415068493150685e-07, "log_odds_chosen": 1.9211444854736328, "log_odds_ratio": -0.30022040009498596, "logits/chosen": 0.7477256059646606, "logits/rejected": 0.7321511507034302, "logps/chosen": -1.750131368637085, "logps/rejected": -3.5393903255462646, "loss": 0.7245, "nll_loss": 0.6945188641548157, "rewards/accuracies": 0.875, "rewards/chosen": -0.17501315474510193, "rewards/margins": 0.17892590165138245, "rewards/rejected": -0.3539390563964844, "step": 1887 }, { "epoch": 5.169062286105407, "grad_norm": 4.483089447021484, "learning_rate": 7.413698630136986e-07, "log_odds_chosen": 1.2275875806808472, "log_odds_ratio": -0.5400864481925964, "logits/chosen": 0.8952027559280396, "logits/rejected": 0.8318414688110352, "logps/chosen": -2.4391870498657227, "logps/rejected": -3.648986339569092, "loss": 0.7665, "nll_loss": 0.7124587297439575, "rewards/accuracies": 0.875, "rewards/chosen": -0.24391868710517883, "rewards/margins": 0.12097994983196259, "rewards/rejected": -0.3648986518383026, "step": 1888 }, { "epoch": 5.171800136892539, "grad_norm": 4.052071571350098, "learning_rate": 7.412328767123287e-07, "log_odds_chosen": 0.6054846048355103, "log_odds_ratio": -0.5631638765335083, "logits/chosen": 0.8606969714164734, "logits/rejected": 0.7684991955757141, "logps/chosen": -2.338953971862793, "logps/rejected": -2.856339693069458, "loss": 0.78, "nll_loss": 0.7236891984939575, "rewards/accuracies": 0.75, "rewards/chosen": -0.2338954210281372, "rewards/margins": 0.05173856392502785, "rewards/rejected": -0.28563398122787476, "step": 1889 }, { "epoch": 5.174537987679671, "grad_norm": 2.143949031829834, "learning_rate": 7.410958904109589e-07, "log_odds_chosen": 2.8381786346435547, "log_odds_ratio": -0.16391262412071228, "logits/chosen": 0.7731746435165405, "logits/rejected": 0.7341316342353821, "logps/chosen": -1.9807401895523071, "logps/rejected": -4.646079063415527, "loss": 0.7646, "nll_loss": 0.7482278347015381, "rewards/accuracies": 1.0, "rewards/chosen": -0.19807402789592743, "rewards/margins": 0.26653388142585754, "rewards/rejected": -0.46460792422294617, "step": 1890 }, { "epoch": 5.177275838466803, "grad_norm": 3.209467649459839, "learning_rate": 7.409589041095891e-07, "log_odds_chosen": 1.2429941892623901, "log_odds_ratio": -0.36346837878227234, "logits/chosen": 1.0635566711425781, "logits/rejected": 1.0561375617980957, "logps/chosen": -1.9755406379699707, "logps/rejected": -3.050109386444092, "loss": 0.6872, "nll_loss": 0.6508815884590149, "rewards/accuracies": 0.875, "rewards/chosen": -0.19755405187606812, "rewards/margins": 0.10745687782764435, "rewards/rejected": -0.30501094460487366, "step": 1891 }, { "epoch": 5.1800136892539355, "grad_norm": 2.1895151138305664, "learning_rate": 7.408219178082191e-07, "log_odds_chosen": 2.5041344165802, "log_odds_ratio": -0.175834059715271, "logits/chosen": 0.852344274520874, "logits/rejected": 0.8196236491203308, "logps/chosen": -2.225039482116699, "logps/rejected": -4.603261470794678, "loss": 0.7471, "nll_loss": 0.7295042872428894, "rewards/accuracies": 1.0, "rewards/chosen": -0.22250394523143768, "rewards/margins": 0.23782220482826233, "rewards/rejected": -0.4603261351585388, "step": 1892 }, { "epoch": 5.1827515400410675, "grad_norm": 3.7917520999908447, "learning_rate": 7.406849315068493e-07, "log_odds_chosen": 1.5476012229919434, "log_odds_ratio": -0.5667366981506348, "logits/chosen": 1.0464000701904297, "logits/rejected": 1.0576832294464111, "logps/chosen": -3.2080845832824707, "logps/rejected": -4.691198825836182, "loss": 0.786, "nll_loss": 0.7293720245361328, "rewards/accuracies": 0.875, "rewards/chosen": -0.320808470249176, "rewards/margins": 0.14831145107746124, "rewards/rejected": -0.4691199064254761, "step": 1893 }, { "epoch": 5.1854893908282, "grad_norm": 2.788405656814575, "learning_rate": 7.405479452054795e-07, "log_odds_chosen": 0.9617873430252075, "log_odds_ratio": -0.44899797439575195, "logits/chosen": 0.5166485905647278, "logits/rejected": 0.49656403064727783, "logps/chosen": -1.9388370513916016, "logps/rejected": -2.7832813262939453, "loss": 0.8528, "nll_loss": 0.8078787326812744, "rewards/accuracies": 0.875, "rewards/chosen": -0.1938837170600891, "rewards/margins": 0.08444438874721527, "rewards/rejected": -0.2783281207084656, "step": 1894 }, { "epoch": 5.188227241615332, "grad_norm": 2.382157564163208, "learning_rate": 7.404109589041095e-07, "log_odds_chosen": 2.4427154064178467, "log_odds_ratio": -0.26332777738571167, "logits/chosen": 0.890979528427124, "logits/rejected": 0.8698557019233704, "logps/chosen": -1.7453663349151611, "logps/rejected": -4.05305814743042, "loss": 0.8153, "nll_loss": 0.789007306098938, "rewards/accuracies": 1.0, "rewards/chosen": -0.17453661561012268, "rewards/margins": 0.23076917231082916, "rewards/rejected": -0.40530580282211304, "step": 1895 }, { "epoch": 5.190965092402464, "grad_norm": 3.076681137084961, "learning_rate": 7.402739726027397e-07, "log_odds_chosen": 2.7915608882904053, "log_odds_ratio": -0.3951926529407501, "logits/chosen": 0.6592344641685486, "logits/rejected": 0.7182623147964478, "logps/chosen": -2.7178776264190674, "logps/rejected": -5.429393291473389, "loss": 0.8962, "nll_loss": 0.8566991090774536, "rewards/accuracies": 0.875, "rewards/chosen": -0.27178773283958435, "rewards/margins": 0.2711516320705414, "rewards/rejected": -0.5429393649101257, "step": 1896 }, { "epoch": 5.193702943189596, "grad_norm": 4.284231662750244, "learning_rate": 7.401369863013698e-07, "log_odds_chosen": 3.2612247467041016, "log_odds_ratio": -0.22386349737644196, "logits/chosen": 1.092301845550537, "logits/rejected": 1.0604819059371948, "logps/chosen": -2.7328877449035645, "logps/rejected": -5.884678840637207, "loss": 0.7702, "nll_loss": 0.7478034496307373, "rewards/accuracies": 0.875, "rewards/chosen": -0.2732887864112854, "rewards/margins": 0.31517907977104187, "rewards/rejected": -0.5884678959846497, "step": 1897 }, { "epoch": 5.196440793976728, "grad_norm": 2.294651985168457, "learning_rate": 7.4e-07, "log_odds_chosen": 2.674109935760498, "log_odds_ratio": -0.255919873714447, "logits/chosen": 0.7234256863594055, "logits/rejected": 0.6419267654418945, "logps/chosen": -1.7468981742858887, "logps/rejected": -4.300261497497559, "loss": 0.7317, "nll_loss": 0.7061357498168945, "rewards/accuracies": 1.0, "rewards/chosen": -0.17468979954719543, "rewards/margins": 0.25533634424209595, "rewards/rejected": -0.4300261437892914, "step": 1898 }, { "epoch": 5.19917864476386, "grad_norm": 3.1770384311676025, "learning_rate": 7.398630136986301e-07, "log_odds_chosen": 1.2442396879196167, "log_odds_ratio": -0.4019136130809784, "logits/chosen": 0.7649312019348145, "logits/rejected": 0.7122263312339783, "logps/chosen": -2.393040180206299, "logps/rejected": -3.540497303009033, "loss": 0.8146, "nll_loss": 0.7744519710540771, "rewards/accuracies": 0.75, "rewards/chosen": -0.23930403590202332, "rewards/margins": 0.11474570631980896, "rewards/rejected": -0.3540497422218323, "step": 1899 }, { "epoch": 5.201916495550993, "grad_norm": 2.61454701423645, "learning_rate": 7.397260273972602e-07, "log_odds_chosen": 1.7570734024047852, "log_odds_ratio": -0.3432411849498749, "logits/chosen": 0.8372462391853333, "logits/rejected": 0.845214307308197, "logps/chosen": -2.5902233123779297, "logps/rejected": -4.276414394378662, "loss": 0.8066, "nll_loss": 0.77228182554245, "rewards/accuracies": 0.75, "rewards/chosen": -0.2590223550796509, "rewards/margins": 0.1686190962791443, "rewards/rejected": -0.42764145135879517, "step": 1900 }, { "epoch": 5.204654346338125, "grad_norm": 2.4094078540802, "learning_rate": 7.395890410958904e-07, "log_odds_chosen": 1.707202672958374, "log_odds_ratio": -0.36941802501678467, "logits/chosen": 0.6281018257141113, "logits/rejected": 0.5861252546310425, "logps/chosen": -1.8462421894073486, "logps/rejected": -3.4690728187561035, "loss": 0.7758, "nll_loss": 0.7388162612915039, "rewards/accuracies": 0.875, "rewards/chosen": -0.18462422490119934, "rewards/margins": 0.1622830480337143, "rewards/rejected": -0.34690725803375244, "step": 1901 }, { "epoch": 5.207392197125257, "grad_norm": 3.402637481689453, "learning_rate": 7.394520547945205e-07, "log_odds_chosen": 1.3201990127563477, "log_odds_ratio": -0.6613479256629944, "logits/chosen": 0.5344811677932739, "logits/rejected": 0.47130101919174194, "logps/chosen": -1.8178398609161377, "logps/rejected": -2.915090560913086, "loss": 0.829, "nll_loss": 0.7628668546676636, "rewards/accuracies": 0.875, "rewards/chosen": -0.1817840188741684, "rewards/margins": 0.10972507297992706, "rewards/rejected": -0.29150906205177307, "step": 1902 }, { "epoch": 5.210130047912389, "grad_norm": 3.4949471950531006, "learning_rate": 7.393150684931506e-07, "log_odds_chosen": 1.8819478750228882, "log_odds_ratio": -0.5784531831741333, "logits/chosen": 0.9234225153923035, "logits/rejected": 0.9748762249946594, "logps/chosen": -2.6642770767211914, "logps/rejected": -4.531985282897949, "loss": 0.786, "nll_loss": 0.7281261086463928, "rewards/accuracies": 0.5, "rewards/chosen": -0.2664276957511902, "rewards/margins": 0.18677081167697906, "rewards/rejected": -0.45319855213165283, "step": 1903 }, { "epoch": 5.212867898699521, "grad_norm": 3.0627059936523438, "learning_rate": 7.391780821917808e-07, "log_odds_chosen": 2.2206897735595703, "log_odds_ratio": -0.2689782381057739, "logits/chosen": 0.886253833770752, "logits/rejected": 0.8611483573913574, "logps/chosen": -2.4416422843933105, "logps/rejected": -4.569514274597168, "loss": 0.8265, "nll_loss": 0.7996271848678589, "rewards/accuracies": 1.0, "rewards/chosen": -0.24416422843933105, "rewards/margins": 0.21278724074363708, "rewards/rejected": -0.45695146918296814, "step": 1904 }, { "epoch": 5.215605749486653, "grad_norm": 2.8921873569488525, "learning_rate": 7.39041095890411e-07, "log_odds_chosen": 2.304582118988037, "log_odds_ratio": -0.38496729731559753, "logits/chosen": 0.6760198473930359, "logits/rejected": 0.6632392406463623, "logps/chosen": -1.6050622463226318, "logps/rejected": -3.7582554817199707, "loss": 0.8368, "nll_loss": 0.7982824444770813, "rewards/accuracies": 0.625, "rewards/chosen": -0.1605062186717987, "rewards/margins": 0.21531935036182404, "rewards/rejected": -0.37582552433013916, "step": 1905 }, { "epoch": 5.218343600273785, "grad_norm": 2.6341564655303955, "learning_rate": 7.38904109589041e-07, "log_odds_chosen": 1.4197564125061035, "log_odds_ratio": -0.2864343225955963, "logits/chosen": 0.48806390166282654, "logits/rejected": 0.4432811439037323, "logps/chosen": -2.0805091857910156, "logps/rejected": -3.407522678375244, "loss": 0.7494, "nll_loss": 0.7207960486412048, "rewards/accuracies": 1.0, "rewards/chosen": -0.2080509066581726, "rewards/margins": 0.13270136713981628, "rewards/rejected": -0.3407522737979889, "step": 1906 }, { "epoch": 5.221081451060917, "grad_norm": 2.3624320030212402, "learning_rate": 7.387671232876712e-07, "log_odds_chosen": 2.9519155025482178, "log_odds_ratio": -0.13875816762447357, "logits/chosen": 0.910253643989563, "logits/rejected": 0.9094600081443787, "logps/chosen": -2.0518198013305664, "logps/rejected": -4.8194074630737305, "loss": 0.6732, "nll_loss": 0.6593471169471741, "rewards/accuracies": 1.0, "rewards/chosen": -0.20518198609352112, "rewards/margins": 0.27675876021385193, "rewards/rejected": -0.48194074630737305, "step": 1907 }, { "epoch": 5.223819301848049, "grad_norm": 2.6102263927459717, "learning_rate": 7.386301369863014e-07, "log_odds_chosen": 1.6758644580841064, "log_odds_ratio": -0.3102095425128937, "logits/chosen": 0.8097944855690002, "logits/rejected": 0.8226109147071838, "logps/chosen": -2.2122154235839844, "logps/rejected": -3.785737991333008, "loss": 0.79, "nll_loss": 0.7589339017868042, "rewards/accuracies": 0.875, "rewards/chosen": -0.22122152149677277, "rewards/margins": 0.1573522686958313, "rewards/rejected": -0.37857380509376526, "step": 1908 }, { "epoch": 5.2265571526351815, "grad_norm": 3.3607616424560547, "learning_rate": 7.384931506849314e-07, "log_odds_chosen": 1.0427621603012085, "log_odds_ratio": -0.5778732299804688, "logits/chosen": 0.7945204973220825, "logits/rejected": 0.846375584602356, "logps/chosen": -2.7721567153930664, "logps/rejected": -3.6436572074890137, "loss": 0.7969, "nll_loss": 0.7390828132629395, "rewards/accuracies": 0.875, "rewards/chosen": -0.2772156596183777, "rewards/margins": 0.08715005964040756, "rewards/rejected": -0.36436572670936584, "step": 1909 }, { "epoch": 5.229295003422314, "grad_norm": 2.449085235595703, "learning_rate": 7.383561643835616e-07, "log_odds_chosen": 3.5500831604003906, "log_odds_ratio": -0.12664273381233215, "logits/chosen": 0.8306617736816406, "logits/rejected": 0.8336629867553711, "logps/chosen": -1.6981539726257324, "logps/rejected": -5.055572509765625, "loss": 0.6649, "nll_loss": 0.6522620916366577, "rewards/accuracies": 1.0, "rewards/chosen": -0.16981539130210876, "rewards/margins": 0.33574187755584717, "rewards/rejected": -0.5055572390556335, "step": 1910 }, { "epoch": 5.232032854209446, "grad_norm": 2.594902515411377, "learning_rate": 7.382191780821917e-07, "log_odds_chosen": 1.580545425415039, "log_odds_ratio": -0.26299619674682617, "logits/chosen": 0.5755226612091064, "logits/rejected": 0.5259930491447449, "logps/chosen": -1.3855774402618408, "logps/rejected": -2.7572669982910156, "loss": 0.7443, "nll_loss": 0.7179692387580872, "rewards/accuracies": 1.0, "rewards/chosen": -0.13855776190757751, "rewards/margins": 0.13716894388198853, "rewards/rejected": -0.27572670578956604, "step": 1911 }, { "epoch": 5.234770704996578, "grad_norm": 3.998546600341797, "learning_rate": 7.380821917808218e-07, "log_odds_chosen": 2.3569555282592773, "log_odds_ratio": -0.360589861869812, "logits/chosen": 1.112203598022461, "logits/rejected": 1.0927417278289795, "logps/chosen": -2.932305097579956, "logps/rejected": -5.177468299865723, "loss": 0.8226, "nll_loss": 0.7865577340126038, "rewards/accuracies": 0.875, "rewards/chosen": -0.29323050379753113, "rewards/margins": 0.224516361951828, "rewards/rejected": -0.5177468061447144, "step": 1912 }, { "epoch": 5.23750855578371, "grad_norm": 2.1854405403137207, "learning_rate": 7.37945205479452e-07, "log_odds_chosen": 2.5300111770629883, "log_odds_ratio": -0.17105266451835632, "logits/chosen": 0.6586034893989563, "logits/rejected": 0.5801489353179932, "logps/chosen": -1.7913553714752197, "logps/rejected": -4.159875392913818, "loss": 0.7554, "nll_loss": 0.7383100390434265, "rewards/accuracies": 1.0, "rewards/chosen": -0.1791355311870575, "rewards/margins": 0.2368520051240921, "rewards/rejected": -0.4159875512123108, "step": 1913 }, { "epoch": 5.240246406570842, "grad_norm": 9.535361289978027, "learning_rate": 7.378082191780821e-07, "log_odds_chosen": 3.7239909172058105, "log_odds_ratio": -0.5435213446617126, "logits/chosen": 0.9971240758895874, "logits/rejected": 0.9379615783691406, "logps/chosen": -2.290372848510742, "logps/rejected": -5.829583168029785, "loss": 0.7369, "nll_loss": 0.6825635433197021, "rewards/accuracies": 0.875, "rewards/chosen": -0.2290372997522354, "rewards/margins": 0.3539210259914398, "rewards/rejected": -0.5829583406448364, "step": 1914 }, { "epoch": 5.242984257357974, "grad_norm": 2.4812870025634766, "learning_rate": 7.376712328767123e-07, "log_odds_chosen": 1.83390212059021, "log_odds_ratio": -0.30773380398750305, "logits/chosen": 0.9820373058319092, "logits/rejected": 0.9800244569778442, "logps/chosen": -2.1319334506988525, "logps/rejected": -3.8602170944213867, "loss": 0.7575, "nll_loss": 0.7267143726348877, "rewards/accuracies": 1.0, "rewards/chosen": -0.2131933569908142, "rewards/margins": 0.17282839119434357, "rewards/rejected": -0.3860217332839966, "step": 1915 }, { "epoch": 5.245722108145106, "grad_norm": 5.672687530517578, "learning_rate": 7.375342465753424e-07, "log_odds_chosen": 0.40505826473236084, "log_odds_ratio": -0.8188577890396118, "logits/chosen": 0.9107064604759216, "logits/rejected": 0.9289263486862183, "logps/chosen": -3.086338996887207, "logps/rejected": -3.4152841567993164, "loss": 0.8139, "nll_loss": 0.731968879699707, "rewards/accuracies": 0.75, "rewards/chosen": -0.3086338937282562, "rewards/margins": 0.032894525676965714, "rewards/rejected": -0.34152841567993164, "step": 1916 }, { "epoch": 5.248459958932238, "grad_norm": 2.1411054134368896, "learning_rate": 7.373972602739725e-07, "log_odds_chosen": 1.8142696619033813, "log_odds_ratio": -0.23372545838356018, "logits/chosen": 0.9056259393692017, "logits/rejected": 0.8823009133338928, "logps/chosen": -1.7008600234985352, "logps/rejected": -3.2414512634277344, "loss": 0.7385, "nll_loss": 0.7150793075561523, "rewards/accuracies": 0.875, "rewards/chosen": -0.17008599638938904, "rewards/margins": 0.15405914187431335, "rewards/rejected": -0.3241451382637024, "step": 1917 }, { "epoch": 5.25119780971937, "grad_norm": 2.3148868083953857, "learning_rate": 7.372602739726027e-07, "log_odds_chosen": 2.173779010772705, "log_odds_ratio": -0.31945568323135376, "logits/chosen": 0.5068829655647278, "logits/rejected": 0.4623083174228668, "logps/chosen": -1.7453505992889404, "logps/rejected": -3.8235225677490234, "loss": 0.7341, "nll_loss": 0.7021293640136719, "rewards/accuracies": 1.0, "rewards/chosen": -0.17453506588935852, "rewards/margins": 0.2078171968460083, "rewards/rejected": -0.38235223293304443, "step": 1918 }, { "epoch": 5.253935660506502, "grad_norm": 2.2680649757385254, "learning_rate": 7.371232876712329e-07, "log_odds_chosen": 2.176595449447632, "log_odds_ratio": -0.15474431216716766, "logits/chosen": 0.8742311000823975, "logits/rejected": 0.8822520971298218, "logps/chosen": -1.4437153339385986, "logps/rejected": -3.294600009918213, "loss": 0.6969, "nll_loss": 0.6814089417457581, "rewards/accuracies": 1.0, "rewards/chosen": -0.14437153935432434, "rewards/margins": 0.18508845567703247, "rewards/rejected": -0.3294599950313568, "step": 1919 }, { "epoch": 5.256673511293634, "grad_norm": 5.191041946411133, "learning_rate": 7.369863013698629e-07, "log_odds_chosen": 0.6157808303833008, "log_odds_ratio": -0.6051087379455566, "logits/chosen": 0.6377661228179932, "logits/rejected": 0.7003324031829834, "logps/chosen": -2.629917621612549, "logps/rejected": -3.1990444660186768, "loss": 0.768, "nll_loss": 0.7075133323669434, "rewards/accuracies": 0.75, "rewards/chosen": -0.2629917562007904, "rewards/margins": 0.05691269040107727, "rewards/rejected": -0.31990447640419006, "step": 1920 }, { "epoch": 5.259411362080766, "grad_norm": 2.87062931060791, "learning_rate": 7.368493150684931e-07, "log_odds_chosen": 2.809307098388672, "log_odds_ratio": -0.2693895697593689, "logits/chosen": 0.8896329402923584, "logits/rejected": 0.9405727386474609, "logps/chosen": -2.1800436973571777, "logps/rejected": -4.895110130310059, "loss": 0.7971, "nll_loss": 0.7701365947723389, "rewards/accuracies": 0.875, "rewards/chosen": -0.21800439059734344, "rewards/margins": 0.271506667137146, "rewards/rejected": -0.48951107263565063, "step": 1921 }, { "epoch": 5.262149212867898, "grad_norm": 2.999704360961914, "learning_rate": 7.367123287671233e-07, "log_odds_chosen": 1.3512499332427979, "log_odds_ratio": -0.46353593468666077, "logits/chosen": 0.6074192523956299, "logits/rejected": 0.5715470314025879, "logps/chosen": -1.5087801218032837, "logps/rejected": -2.7669811248779297, "loss": 0.7863, "nll_loss": 0.7399712800979614, "rewards/accuracies": 0.875, "rewards/chosen": -0.15087801218032837, "rewards/margins": 0.125820130109787, "rewards/rejected": -0.27669811248779297, "step": 1922 }, { "epoch": 5.2648870636550305, "grad_norm": 3.009568691253662, "learning_rate": 7.365753424657533e-07, "log_odds_chosen": 2.380525827407837, "log_odds_ratio": -0.14345967769622803, "logits/chosen": 0.6360346078872681, "logits/rejected": 0.5981690883636475, "logps/chosen": -2.040992021560669, "logps/rejected": -4.216834545135498, "loss": 0.6961, "nll_loss": 0.6817883253097534, "rewards/accuracies": 1.0, "rewards/chosen": -0.20409920811653137, "rewards/margins": 0.2175842672586441, "rewards/rejected": -0.4216834604740143, "step": 1923 }, { "epoch": 5.2676249144421625, "grad_norm": 3.311014413833618, "learning_rate": 7.364383561643835e-07, "log_odds_chosen": 2.1716256141662598, "log_odds_ratio": -0.3497595191001892, "logits/chosen": 0.9858922958374023, "logits/rejected": 1.021310806274414, "logps/chosen": -2.6996560096740723, "logps/rejected": -4.764933109283447, "loss": 0.8924, "nll_loss": 0.8574115633964539, "rewards/accuracies": 0.75, "rewards/chosen": -0.26996558904647827, "rewards/margins": 0.2065277099609375, "rewards/rejected": -0.47649329900741577, "step": 1924 }, { "epoch": 5.270362765229295, "grad_norm": 2.5397911071777344, "learning_rate": 7.363013698630136e-07, "log_odds_chosen": 1.2168554067611694, "log_odds_ratio": -0.38572442531585693, "logits/chosen": 0.7460162043571472, "logits/rejected": 0.7191258668899536, "logps/chosen": -1.9759489297866821, "logps/rejected": -3.0796165466308594, "loss": 0.7517, "nll_loss": 0.7131584882736206, "rewards/accuracies": 0.75, "rewards/chosen": -0.19759491086006165, "rewards/margins": 0.11036673933267593, "rewards/rejected": -0.307961642742157, "step": 1925 }, { "epoch": 5.273100616016427, "grad_norm": 2.286993980407715, "learning_rate": 7.361643835616437e-07, "log_odds_chosen": 2.4081783294677734, "log_odds_ratio": -0.17424173653125763, "logits/chosen": 0.9742858409881592, "logits/rejected": 0.9934576749801636, "logps/chosen": -2.0445027351379395, "logps/rejected": -4.336664199829102, "loss": 0.6578, "nll_loss": 0.6403695344924927, "rewards/accuracies": 1.0, "rewards/chosen": -0.20445027947425842, "rewards/margins": 0.22921617329120636, "rewards/rejected": -0.433666467666626, "step": 1926 }, { "epoch": 5.27583846680356, "grad_norm": 2.5378355979919434, "learning_rate": 7.360273972602739e-07, "log_odds_chosen": 1.3654392957687378, "log_odds_ratio": -0.290855348110199, "logits/chosen": 0.8386270999908447, "logits/rejected": 0.8415400981903076, "logps/chosen": -1.9171347618103027, "logps/rejected": -3.1687850952148438, "loss": 0.7296, "nll_loss": 0.7004892230033875, "rewards/accuracies": 1.0, "rewards/chosen": -0.19171349704265594, "rewards/margins": 0.12516500055789948, "rewards/rejected": -0.3168784976005554, "step": 1927 }, { "epoch": 5.278576317590692, "grad_norm": 2.6806559562683105, "learning_rate": 7.35890410958904e-07, "log_odds_chosen": 1.768423318862915, "log_odds_ratio": -0.2822344899177551, "logits/chosen": 0.746544361114502, "logits/rejected": 0.6863689422607422, "logps/chosen": -1.5417516231536865, "logps/rejected": -3.1588666439056396, "loss": 0.7927, "nll_loss": 0.7645182609558105, "rewards/accuracies": 1.0, "rewards/chosen": -0.15417516231536865, "rewards/margins": 0.16171151399612427, "rewards/rejected": -0.3158866763114929, "step": 1928 }, { "epoch": 5.281314168377824, "grad_norm": 2.715754985809326, "learning_rate": 7.357534246575342e-07, "log_odds_chosen": 1.7997751235961914, "log_odds_ratio": -0.2936442494392395, "logits/chosen": 0.8549410104751587, "logits/rejected": 0.8478528261184692, "logps/chosen": -2.0193276405334473, "logps/rejected": -3.6858503818511963, "loss": 0.7387, "nll_loss": 0.7093192934989929, "rewards/accuracies": 0.875, "rewards/chosen": -0.20193277299404144, "rewards/margins": 0.16665229201316833, "rewards/rejected": -0.3685850501060486, "step": 1929 }, { "epoch": 5.284052019164956, "grad_norm": 3.4833590984344482, "learning_rate": 7.356164383561643e-07, "log_odds_chosen": 1.8930389881134033, "log_odds_ratio": -0.29639047384262085, "logits/chosen": 0.9865908622741699, "logits/rejected": 1.0073829889297485, "logps/chosen": -2.203556776046753, "logps/rejected": -3.8960506916046143, "loss": 0.7704, "nll_loss": 0.7407239675521851, "rewards/accuracies": 0.875, "rewards/chosen": -0.22035568952560425, "rewards/margins": 0.16924940049648285, "rewards/rejected": -0.3896051049232483, "step": 1930 }, { "epoch": 5.286789869952088, "grad_norm": 3.2722840309143066, "learning_rate": 7.354794520547944e-07, "log_odds_chosen": 2.771669387817383, "log_odds_ratio": -0.13636243343353271, "logits/chosen": 1.029060959815979, "logits/rejected": 1.0879335403442383, "logps/chosen": -2.657179355621338, "logps/rejected": -5.291482448577881, "loss": 0.7235, "nll_loss": 0.7098251581192017, "rewards/accuracies": 1.0, "rewards/chosen": -0.2657179534435272, "rewards/margins": 0.26343029737472534, "rewards/rejected": -0.529148280620575, "step": 1931 }, { "epoch": 5.28952772073922, "grad_norm": 2.508204221725464, "learning_rate": 7.353424657534247e-07, "log_odds_chosen": 2.3305091857910156, "log_odds_ratio": -0.18208327889442444, "logits/chosen": 0.9198340773582458, "logits/rejected": 0.9051928520202637, "logps/chosen": -1.8588850498199463, "logps/rejected": -4.041192054748535, "loss": 0.6966, "nll_loss": 0.6783623695373535, "rewards/accuracies": 1.0, "rewards/chosen": -0.18588849902153015, "rewards/margins": 0.2182307094335556, "rewards/rejected": -0.40411925315856934, "step": 1932 }, { "epoch": 5.292265571526352, "grad_norm": 2.740483522415161, "learning_rate": 7.352054794520547e-07, "log_odds_chosen": 3.0551376342773438, "log_odds_ratio": -0.10433352738618851, "logits/chosen": 1.1576577425003052, "logits/rejected": 1.2160731554031372, "logps/chosen": -2.371751308441162, "logps/rejected": -5.302766799926758, "loss": 0.7154, "nll_loss": 0.7049350738525391, "rewards/accuracies": 1.0, "rewards/chosen": -0.2371751368045807, "rewards/margins": 0.29310157895088196, "rewards/rejected": -0.5302767157554626, "step": 1933 }, { "epoch": 5.295003422313484, "grad_norm": 3.8072962760925293, "learning_rate": 7.350684931506848e-07, "log_odds_chosen": 1.9565907716751099, "log_odds_ratio": -0.4141170382499695, "logits/chosen": 1.0147287845611572, "logits/rejected": 1.0390187501907349, "logps/chosen": -2.478501796722412, "logps/rejected": -4.309947490692139, "loss": 0.7958, "nll_loss": 0.7543957233428955, "rewards/accuracies": 0.75, "rewards/chosen": -0.2478501796722412, "rewards/margins": 0.18314458429813385, "rewards/rejected": -0.43099474906921387, "step": 1934 }, { "epoch": 5.297741273100616, "grad_norm": 2.6929354667663574, "learning_rate": 7.349315068493151e-07, "log_odds_chosen": 3.210631847381592, "log_odds_ratio": -0.1467832773923874, "logits/chosen": 1.219232439994812, "logits/rejected": 1.233077049255371, "logps/chosen": -2.495821952819824, "logps/rejected": -5.628872394561768, "loss": 0.6837, "nll_loss": 0.6690496802330017, "rewards/accuracies": 1.0, "rewards/chosen": -0.2495822012424469, "rewards/margins": 0.3133050799369812, "rewards/rejected": -0.5628872513771057, "step": 1935 }, { "epoch": 5.300479123887748, "grad_norm": 3.768646001815796, "learning_rate": 7.347945205479453e-07, "log_odds_chosen": 1.8972811698913574, "log_odds_ratio": -0.4434157907962799, "logits/chosen": 0.8602045774459839, "logits/rejected": 0.7519694566726685, "logps/chosen": -2.012833595275879, "logps/rejected": -3.788290023803711, "loss": 0.83, "nll_loss": 0.7856922745704651, "rewards/accuracies": 0.75, "rewards/chosen": -0.20128336548805237, "rewards/margins": 0.17754560708999634, "rewards/rejected": -0.3788289427757263, "step": 1936 }, { "epoch": 5.30321697467488, "grad_norm": 2.236299991607666, "learning_rate": 7.346575342465753e-07, "log_odds_chosen": 3.244588613510132, "log_odds_ratio": -0.11945880949497223, "logits/chosen": 0.8600385785102844, "logits/rejected": 0.7840101718902588, "logps/chosen": -1.4719371795654297, "logps/rejected": -4.370847702026367, "loss": 0.699, "nll_loss": 0.6870867013931274, "rewards/accuracies": 1.0, "rewards/chosen": -0.14719372987747192, "rewards/margins": 0.2898910641670227, "rewards/rejected": -0.43708479404449463, "step": 1937 }, { "epoch": 5.305954825462012, "grad_norm": 5.456212043762207, "learning_rate": 7.345205479452055e-07, "log_odds_chosen": 0.8432458639144897, "log_odds_ratio": -0.5631333589553833, "logits/chosen": 0.9425469040870667, "logits/rejected": 0.9150486588478088, "logps/chosen": -2.058199167251587, "logps/rejected": -2.7737784385681152, "loss": 0.756, "nll_loss": 0.6997210383415222, "rewards/accuracies": 0.875, "rewards/chosen": -0.20581990480422974, "rewards/margins": 0.07155793160200119, "rewards/rejected": -0.2773778438568115, "step": 1938 }, { "epoch": 5.308692676249144, "grad_norm": 2.253047466278076, "learning_rate": 7.343835616438357e-07, "log_odds_chosen": 3.927563190460205, "log_odds_ratio": -0.034253329038619995, "logits/chosen": 0.9317804574966431, "logits/rejected": 0.8372564315795898, "logps/chosen": -2.0770413875579834, "logps/rejected": -5.7740325927734375, "loss": 0.7322, "nll_loss": 0.7287455201148987, "rewards/accuracies": 1.0, "rewards/chosen": -0.20770414173603058, "rewards/margins": 0.3696991205215454, "rewards/rejected": -0.57740318775177, "step": 1939 }, { "epoch": 5.3114305270362765, "grad_norm": 3.780547857284546, "learning_rate": 7.342465753424657e-07, "log_odds_chosen": 0.670890212059021, "log_odds_ratio": -0.7524077296257019, "logits/chosen": 0.9935494661331177, "logits/rejected": 1.0728635787963867, "logps/chosen": -2.852421283721924, "logps/rejected": -3.472081422805786, "loss": 0.8908, "nll_loss": 0.8155867457389832, "rewards/accuracies": 0.5, "rewards/chosen": -0.28524211049079895, "rewards/margins": 0.06196601688861847, "rewards/rejected": -0.3472081422805786, "step": 1940 }, { "epoch": 5.314168377823409, "grad_norm": 2.7942404747009277, "learning_rate": 7.341095890410959e-07, "log_odds_chosen": 1.671419620513916, "log_odds_ratio": -0.28603145480155945, "logits/chosen": 0.9401340484619141, "logits/rejected": 0.9763977527618408, "logps/chosen": -2.4901885986328125, "logps/rejected": -4.0714521408081055, "loss": 0.7484, "nll_loss": 0.7197967767715454, "rewards/accuracies": 1.0, "rewards/chosen": -0.24901887774467468, "rewards/margins": 0.15812629461288452, "rewards/rejected": -0.4071452021598816, "step": 1941 }, { "epoch": 5.316906228610541, "grad_norm": 4.893795490264893, "learning_rate": 7.33972602739726e-07, "log_odds_chosen": 0.7326743006706238, "log_odds_ratio": -0.5098870992660522, "logits/chosen": 0.8650693297386169, "logits/rejected": 0.7908636927604675, "logps/chosen": -2.8028974533081055, "logps/rejected": -3.468780994415283, "loss": 0.86, "nll_loss": 0.809012770652771, "rewards/accuracies": 0.875, "rewards/chosen": -0.28028973937034607, "rewards/margins": 0.06658836454153061, "rewards/rejected": -0.3468781113624573, "step": 1942 }, { "epoch": 5.319644079397673, "grad_norm": 3.030815839767456, "learning_rate": 7.338356164383562e-07, "log_odds_chosen": 1.9117913246154785, "log_odds_ratio": -0.36361998319625854, "logits/chosen": 0.8874834179878235, "logits/rejected": 0.9381207823753357, "logps/chosen": -2.327885627746582, "logps/rejected": -4.094750881195068, "loss": 0.7742, "nll_loss": 0.7378489375114441, "rewards/accuracies": 0.875, "rewards/chosen": -0.2327885627746582, "rewards/margins": 0.17668652534484863, "rewards/rejected": -0.40947508811950684, "step": 1943 }, { "epoch": 5.322381930184805, "grad_norm": 2.778747081756592, "learning_rate": 7.336986301369863e-07, "log_odds_chosen": 2.4343276023864746, "log_odds_ratio": -0.29709023237228394, "logits/chosen": 0.735943615436554, "logits/rejected": 0.6825941205024719, "logps/chosen": -2.0673680305480957, "logps/rejected": -4.361354827880859, "loss": 0.7503, "nll_loss": 0.7206387519836426, "rewards/accuracies": 1.0, "rewards/chosen": -0.20673683285713196, "rewards/margins": 0.2293986976146698, "rewards/rejected": -0.43613550066947937, "step": 1944 }, { "epoch": 5.325119780971937, "grad_norm": 2.3983147144317627, "learning_rate": 7.335616438356164e-07, "log_odds_chosen": 1.2090147733688354, "log_odds_ratio": -0.3445817530155182, "logits/chosen": 0.766045093536377, "logits/rejected": 0.7063395380973816, "logps/chosen": -1.7243993282318115, "logps/rejected": -2.8031821250915527, "loss": 0.7485, "nll_loss": 0.7140120267868042, "rewards/accuracies": 1.0, "rewards/chosen": -0.17243993282318115, "rewards/margins": 0.10787828266620636, "rewards/rejected": -0.2803182005882263, "step": 1945 }, { "epoch": 5.327857631759069, "grad_norm": 3.1049964427948, "learning_rate": 7.334246575342466e-07, "log_odds_chosen": 1.381229281425476, "log_odds_ratio": -0.32685211300849915, "logits/chosen": 0.8634268045425415, "logits/rejected": 0.8295353651046753, "logps/chosen": -2.265230417251587, "logps/rejected": -3.555058479309082, "loss": 0.8388, "nll_loss": 0.8061620593070984, "rewards/accuracies": 1.0, "rewards/chosen": -0.2265230417251587, "rewards/margins": 0.12898282706737518, "rewards/rejected": -0.3555058538913727, "step": 1946 }, { "epoch": 5.330595482546201, "grad_norm": 2.7409613132476807, "learning_rate": 7.332876712328767e-07, "log_odds_chosen": 2.4793012142181396, "log_odds_ratio": -0.18997827172279358, "logits/chosen": 0.9009137153625488, "logits/rejected": 0.8739631772041321, "logps/chosen": -1.950965166091919, "logps/rejected": -4.2605977058410645, "loss": 0.7199, "nll_loss": 0.7009043097496033, "rewards/accuracies": 1.0, "rewards/chosen": -0.19509652256965637, "rewards/margins": 0.23096325993537903, "rewards/rejected": -0.426059752702713, "step": 1947 }, { "epoch": 5.333333333333333, "grad_norm": 2.4654324054718018, "learning_rate": 7.331506849315068e-07, "log_odds_chosen": 2.2255423069000244, "log_odds_ratio": -0.35751086473464966, "logits/chosen": 0.7463555335998535, "logits/rejected": 0.7180159091949463, "logps/chosen": -1.833878517150879, "logps/rejected": -3.968626022338867, "loss": 0.7804, "nll_loss": 0.744694709777832, "rewards/accuracies": 0.75, "rewards/chosen": -0.1833878457546234, "rewards/margins": 0.21347475051879883, "rewards/rejected": -0.39686259627342224, "step": 1948 }, { "epoch": 5.336071184120465, "grad_norm": 2.8837530612945557, "learning_rate": 7.33013698630137e-07, "log_odds_chosen": 2.0153133869171143, "log_odds_ratio": -0.5200818181037903, "logits/chosen": 1.008190631866455, "logits/rejected": 0.9987890720367432, "logps/chosen": -2.378340244293213, "logps/rejected": -4.332489967346191, "loss": 0.8186, "nll_loss": 0.766563892364502, "rewards/accuracies": 0.625, "rewards/chosen": -0.23783403635025024, "rewards/margins": 0.1954149603843689, "rewards/rejected": -0.43324902653694153, "step": 1949 }, { "epoch": 5.338809034907597, "grad_norm": 2.920473575592041, "learning_rate": 7.328767123287672e-07, "log_odds_chosen": 1.8949573040008545, "log_odds_ratio": -0.30385953187942505, "logits/chosen": 0.625618577003479, "logits/rejected": 0.6389299631118774, "logps/chosen": -2.201587677001953, "logps/rejected": -3.966491222381592, "loss": 0.7139, "nll_loss": 0.6835095882415771, "rewards/accuracies": 0.75, "rewards/chosen": -0.22015877068042755, "rewards/margins": 0.17649036645889282, "rewards/rejected": -0.39664915204048157, "step": 1950 }, { "epoch": 5.341546885694729, "grad_norm": 2.49984073638916, "learning_rate": 7.327397260273972e-07, "log_odds_chosen": 2.060173511505127, "log_odds_ratio": -0.24952557682991028, "logits/chosen": 0.7642645835876465, "logits/rejected": 0.6823479533195496, "logps/chosen": -1.6000993251800537, "logps/rejected": -3.478248119354248, "loss": 0.8112, "nll_loss": 0.7862603664398193, "rewards/accuracies": 1.0, "rewards/chosen": -0.16000992059707642, "rewards/margins": 0.187814861536026, "rewards/rejected": -0.3478248119354248, "step": 1951 }, { "epoch": 5.344284736481862, "grad_norm": 2.8205273151397705, "learning_rate": 7.326027397260274e-07, "log_odds_chosen": 0.6665676832199097, "log_odds_ratio": -0.4785004258155823, "logits/chosen": 0.6722358465194702, "logits/rejected": 0.6524913311004639, "logps/chosen": -1.7875745296478271, "logps/rejected": -2.3685145378112793, "loss": 0.8344, "nll_loss": 0.7865750193595886, "rewards/accuracies": 0.875, "rewards/chosen": -0.1787574589252472, "rewards/margins": 0.058094002306461334, "rewards/rejected": -0.23685145378112793, "step": 1952 }, { "epoch": 5.347022587268993, "grad_norm": 3.079160451889038, "learning_rate": 7.324657534246576e-07, "log_odds_chosen": 3.6320667266845703, "log_odds_ratio": -0.20705507695674896, "logits/chosen": 0.9490227699279785, "logits/rejected": 0.9571726322174072, "logps/chosen": -2.7457971572875977, "logps/rejected": -6.319643020629883, "loss": 0.7956, "nll_loss": 0.7748450040817261, "rewards/accuracies": 0.875, "rewards/chosen": -0.2745797038078308, "rewards/margins": 0.3573845624923706, "rewards/rejected": -0.6319643259048462, "step": 1953 }, { "epoch": 5.349760438056126, "grad_norm": 2.366124391555786, "learning_rate": 7.323287671232876e-07, "log_odds_chosen": 1.7702181339263916, "log_odds_ratio": -0.2451227754354477, "logits/chosen": 1.0290443897247314, "logits/rejected": 1.0699081420898438, "logps/chosen": -1.9399455785751343, "logps/rejected": -3.5789146423339844, "loss": 0.6996, "nll_loss": 0.6750653982162476, "rewards/accuracies": 1.0, "rewards/chosen": -0.19399458169937134, "rewards/margins": 0.16389691829681396, "rewards/rejected": -0.3578914999961853, "step": 1954 }, { "epoch": 5.352498288843258, "grad_norm": 4.787405014038086, "learning_rate": 7.321917808219178e-07, "log_odds_chosen": 0.6722887754440308, "log_odds_ratio": -0.4687175154685974, "logits/chosen": 0.9414781332015991, "logits/rejected": 0.8580016493797302, "logps/chosen": -2.642935276031494, "logps/rejected": -3.19681453704834, "loss": 0.8015, "nll_loss": 0.7545996904373169, "rewards/accuracies": 0.875, "rewards/chosen": -0.2642935514450073, "rewards/margins": 0.05538792908191681, "rewards/rejected": -0.31968146562576294, "step": 1955 }, { "epoch": 5.3552361396303905, "grad_norm": 4.072346210479736, "learning_rate": 7.320547945205479e-07, "log_odds_chosen": 2.4448459148406982, "log_odds_ratio": -0.4592107832431793, "logits/chosen": 1.3086891174316406, "logits/rejected": 1.3527250289916992, "logps/chosen": -3.3757314682006836, "logps/rejected": -5.703181266784668, "loss": 0.7271, "nll_loss": 0.6812273263931274, "rewards/accuracies": 0.75, "rewards/chosen": -0.33757320046424866, "rewards/margins": 0.23274488747119904, "rewards/rejected": -0.5703181028366089, "step": 1956 }, { "epoch": 5.3579739904175225, "grad_norm": 2.501821517944336, "learning_rate": 7.319178082191781e-07, "log_odds_chosen": 1.390197992324829, "log_odds_ratio": -0.30841055512428284, "logits/chosen": 0.5893000364303589, "logits/rejected": 0.5497415065765381, "logps/chosen": -2.0128066539764404, "logps/rejected": -3.2742083072662354, "loss": 0.7341, "nll_loss": 0.703303337097168, "rewards/accuracies": 0.875, "rewards/chosen": -0.20128066837787628, "rewards/margins": 0.12614016234874725, "rewards/rejected": -0.32742083072662354, "step": 1957 }, { "epoch": 5.360711841204655, "grad_norm": 2.462073564529419, "learning_rate": 7.317808219178082e-07, "log_odds_chosen": 2.5694236755371094, "log_odds_ratio": -0.2122323215007782, "logits/chosen": 0.719364583492279, "logits/rejected": 0.6866661906242371, "logps/chosen": -1.391812801361084, "logps/rejected": -3.7687320709228516, "loss": 0.7683, "nll_loss": 0.7470369338989258, "rewards/accuracies": 1.0, "rewards/chosen": -0.13918128609657288, "rewards/margins": 0.23769190907478333, "rewards/rejected": -0.3768731951713562, "step": 1958 }, { "epoch": 5.363449691991787, "grad_norm": 2.6480350494384766, "learning_rate": 7.316438356164383e-07, "log_odds_chosen": 2.4488070011138916, "log_odds_ratio": -0.24952363967895508, "logits/chosen": 0.765081524848938, "logits/rejected": 0.732974648475647, "logps/chosen": -1.8591067790985107, "logps/rejected": -4.154518127441406, "loss": 0.7402, "nll_loss": 0.7152969837188721, "rewards/accuracies": 0.75, "rewards/chosen": -0.1859106719493866, "rewards/margins": 0.22954116761684418, "rewards/rejected": -0.41545185446739197, "step": 1959 }, { "epoch": 5.366187542778919, "grad_norm": 2.3090038299560547, "learning_rate": 7.315068493150685e-07, "log_odds_chosen": 2.829538583755493, "log_odds_ratio": -0.252733051776886, "logits/chosen": 0.8390366435050964, "logits/rejected": 0.8077381253242493, "logps/chosen": -2.335922956466675, "logps/rejected": -5.071994781494141, "loss": 0.8037, "nll_loss": 0.7784727811813354, "rewards/accuracies": 0.875, "rewards/chosen": -0.23359230160713196, "rewards/margins": 0.27360713481903076, "rewards/rejected": -0.5071994662284851, "step": 1960 }, { "epoch": 5.368925393566051, "grad_norm": 3.0277504920959473, "learning_rate": 7.313698630136986e-07, "log_odds_chosen": 0.5852700471878052, "log_odds_ratio": -0.5224595665931702, "logits/chosen": 0.8602107167243958, "logits/rejected": 0.8032550811767578, "logps/chosen": -2.3319199085235596, "logps/rejected": -2.8562545776367188, "loss": 0.7959, "nll_loss": 0.7436472177505493, "rewards/accuracies": 0.75, "rewards/chosen": -0.23319198191165924, "rewards/margins": 0.05243346095085144, "rewards/rejected": -0.2856254577636719, "step": 1961 }, { "epoch": 5.371663244353183, "grad_norm": 2.5508782863616943, "learning_rate": 7.312328767123287e-07, "log_odds_chosen": 2.3812108039855957, "log_odds_ratio": -0.3060770034790039, "logits/chosen": 1.0236843824386597, "logits/rejected": 1.0471985340118408, "logps/chosen": -1.463333249092102, "logps/rejected": -3.6074838638305664, "loss": 0.785, "nll_loss": 0.7543861269950867, "rewards/accuracies": 1.0, "rewards/chosen": -0.14633332192897797, "rewards/margins": 0.2144150733947754, "rewards/rejected": -0.36074841022491455, "step": 1962 }, { "epoch": 5.374401095140315, "grad_norm": 2.5928242206573486, "learning_rate": 7.310958904109589e-07, "log_odds_chosen": 2.517570734024048, "log_odds_ratio": -0.2644721269607544, "logits/chosen": 1.048514723777771, "logits/rejected": 1.0576459169387817, "logps/chosen": -1.9372395277023315, "logps/rejected": -4.3581767082214355, "loss": 0.6762, "nll_loss": 0.649787962436676, "rewards/accuracies": 1.0, "rewards/chosen": -0.19372394680976868, "rewards/margins": 0.24209371209144592, "rewards/rejected": -0.435817688703537, "step": 1963 }, { "epoch": 5.377138945927447, "grad_norm": 2.719947099685669, "learning_rate": 7.309589041095891e-07, "log_odds_chosen": 1.4313085079193115, "log_odds_ratio": -0.3902883529663086, "logits/chosen": 0.6922914385795593, "logits/rejected": 0.727502703666687, "logps/chosen": -2.149336576461792, "logps/rejected": -3.543459415435791, "loss": 0.7979, "nll_loss": 0.7588233351707458, "rewards/accuracies": 0.875, "rewards/chosen": -0.21493366360664368, "rewards/margins": 0.1394122987985611, "rewards/rejected": -0.35434597730636597, "step": 1964 }, { "epoch": 5.379876796714579, "grad_norm": 3.235708475112915, "learning_rate": 7.308219178082191e-07, "log_odds_chosen": 1.402916431427002, "log_odds_ratio": -0.31370678544044495, "logits/chosen": 0.7898426651954651, "logits/rejected": 0.7042710185050964, "logps/chosen": -1.4159427881240845, "logps/rejected": -2.5857362747192383, "loss": 0.8172, "nll_loss": 0.7858223915100098, "rewards/accuracies": 1.0, "rewards/chosen": -0.1415942907333374, "rewards/margins": 0.11697936058044434, "rewards/rejected": -0.25857365131378174, "step": 1965 }, { "epoch": 5.382614647501711, "grad_norm": 2.70519757270813, "learning_rate": 7.306849315068493e-07, "log_odds_chosen": 3.7437572479248047, "log_odds_ratio": -0.09865467250347137, "logits/chosen": 0.9572830200195312, "logits/rejected": 0.9869809150695801, "logps/chosen": -2.303403854370117, "logps/rejected": -5.9326019287109375, "loss": 0.7724, "nll_loss": 0.7625330090522766, "rewards/accuracies": 1.0, "rewards/chosen": -0.2303403913974762, "rewards/margins": 0.36291977763175964, "rewards/rejected": -0.5932601690292358, "step": 1966 }, { "epoch": 5.385352498288843, "grad_norm": 3.059534788131714, "learning_rate": 7.305479452054795e-07, "log_odds_chosen": 2.825697898864746, "log_odds_ratio": -0.15893520414829254, "logits/chosen": 0.8466119170188904, "logits/rejected": 0.814221978187561, "logps/chosen": -2.4054646492004395, "logps/rejected": -5.124406814575195, "loss": 0.8722, "nll_loss": 0.8563309907913208, "rewards/accuracies": 1.0, "rewards/chosen": -0.24054645001888275, "rewards/margins": 0.271894246339798, "rewards/rejected": -0.5124406814575195, "step": 1967 }, { "epoch": 5.388090349075975, "grad_norm": 3.8141069412231445, "learning_rate": 7.304109589041095e-07, "log_odds_chosen": 1.8480325937271118, "log_odds_ratio": -0.6913058757781982, "logits/chosen": 0.8187546730041504, "logits/rejected": 0.8425841927528381, "logps/chosen": -2.5222957134246826, "logps/rejected": -4.286046504974365, "loss": 0.8747, "nll_loss": 0.8055201768875122, "rewards/accuracies": 0.75, "rewards/chosen": -0.25222960114479065, "rewards/margins": 0.17637507617473602, "rewards/rejected": -0.4286046624183655, "step": 1968 }, { "epoch": 5.390828199863107, "grad_norm": 2.6429860591888428, "learning_rate": 7.302739726027397e-07, "log_odds_chosen": 2.586181163787842, "log_odds_ratio": -0.30386272072792053, "logits/chosen": 0.7617179155349731, "logits/rejected": 0.7309774160385132, "logps/chosen": -1.6912444829940796, "logps/rejected": -4.121814250946045, "loss": 0.7456, "nll_loss": 0.7152242660522461, "rewards/accuracies": 0.875, "rewards/chosen": -0.16912443935871124, "rewards/margins": 0.2430570125579834, "rewards/rejected": -0.41218143701553345, "step": 1969 }, { "epoch": 5.393566050650239, "grad_norm": 2.2784035205841064, "learning_rate": 7.301369863013699e-07, "log_odds_chosen": 3.0429420471191406, "log_odds_ratio": -0.3535492718219757, "logits/chosen": 0.8315854072570801, "logits/rejected": 0.8282468318939209, "logps/chosen": -1.8163096904754639, "logps/rejected": -4.591804027557373, "loss": 0.7748, "nll_loss": 0.7394640445709229, "rewards/accuracies": 0.875, "rewards/chosen": -0.18163099884986877, "rewards/margins": 0.2775494158267975, "rewards/rejected": -0.45918041467666626, "step": 1970 }, { "epoch": 5.3963039014373715, "grad_norm": 2.717475652694702, "learning_rate": 7.3e-07, "log_odds_chosen": 2.2238588333129883, "log_odds_ratio": -0.1288844645023346, "logits/chosen": 1.23997163772583, "logits/rejected": 1.2460427284240723, "logps/chosen": -2.213416576385498, "logps/rejected": -4.284419059753418, "loss": 0.65, "nll_loss": 0.6370837688446045, "rewards/accuracies": 1.0, "rewards/chosen": -0.22134165465831757, "rewards/margins": 0.20710024237632751, "rewards/rejected": -0.4284418821334839, "step": 1971 }, { "epoch": 5.3990417522245036, "grad_norm": 2.789407730102539, "learning_rate": 7.298630136986301e-07, "log_odds_chosen": 2.8699800968170166, "log_odds_ratio": -0.22733065485954285, "logits/chosen": 0.9651943445205688, "logits/rejected": 0.9672830104827881, "logps/chosen": -1.6144402027130127, "logps/rejected": -4.1568827629089355, "loss": 0.6464, "nll_loss": 0.6236515641212463, "rewards/accuracies": 0.875, "rewards/chosen": -0.1614440381526947, "rewards/margins": 0.25424423813819885, "rewards/rejected": -0.41568827629089355, "step": 1972 }, { "epoch": 5.401779603011636, "grad_norm": 3.3185791969299316, "learning_rate": 7.297260273972602e-07, "log_odds_chosen": 1.2138659954071045, "log_odds_ratio": -0.41836822032928467, "logits/chosen": 0.7003427743911743, "logits/rejected": 0.7141348123550415, "logps/chosen": -1.9979474544525146, "logps/rejected": -3.070145845413208, "loss": 0.7045, "nll_loss": 0.6626236438751221, "rewards/accuracies": 0.875, "rewards/chosen": -0.199794739484787, "rewards/margins": 0.10721984505653381, "rewards/rejected": -0.3070145845413208, "step": 1973 }, { "epoch": 5.404517453798768, "grad_norm": 2.8089582920074463, "learning_rate": 7.295890410958904e-07, "log_odds_chosen": 1.6046184301376343, "log_odds_ratio": -0.38695743680000305, "logits/chosen": 0.7543125152587891, "logits/rejected": 0.7942370176315308, "logps/chosen": -1.8758795261383057, "logps/rejected": -3.3369293212890625, "loss": 0.7013, "nll_loss": 0.6625827550888062, "rewards/accuracies": 0.875, "rewards/chosen": -0.18758796155452728, "rewards/margins": 0.14610496163368225, "rewards/rejected": -0.33369290828704834, "step": 1974 }, { "epoch": 5.4072553045859, "grad_norm": 4.0871357917785645, "learning_rate": 7.294520547945205e-07, "log_odds_chosen": 1.9169325828552246, "log_odds_ratio": -0.5373523831367493, "logits/chosen": 0.7521225214004517, "logits/rejected": 0.748436689376831, "logps/chosen": -2.6440083980560303, "logps/rejected": -4.4818315505981445, "loss": 0.8004, "nll_loss": 0.7467145919799805, "rewards/accuracies": 0.75, "rewards/chosen": -0.264400839805603, "rewards/margins": 0.18378227949142456, "rewards/rejected": -0.4481831192970276, "step": 1975 }, { "epoch": 5.409993155373032, "grad_norm": 3.609952688217163, "learning_rate": 7.293150684931506e-07, "log_odds_chosen": 2.203394651412964, "log_odds_ratio": -0.3674221634864807, "logits/chosen": 0.5476335287094116, "logits/rejected": 0.5623046159744263, "logps/chosen": -1.8284357786178589, "logps/rejected": -3.823709487915039, "loss": 0.7696, "nll_loss": 0.7329055666923523, "rewards/accuracies": 0.875, "rewards/chosen": -0.18284356594085693, "rewards/margins": 0.19952738285064697, "rewards/rejected": -0.3823709487915039, "step": 1976 }, { "epoch": 5.412731006160164, "grad_norm": 2.511321544647217, "learning_rate": 7.291780821917808e-07, "log_odds_chosen": 2.7842953205108643, "log_odds_ratio": -0.17963768541812897, "logits/chosen": 0.7405108213424683, "logits/rejected": 0.650447428226471, "logps/chosen": -1.5103143453598022, "logps/rejected": -4.056231498718262, "loss": 0.7615, "nll_loss": 0.7435621023178101, "rewards/accuracies": 1.0, "rewards/chosen": -0.15103143453598022, "rewards/margins": 0.254591703414917, "rewards/rejected": -0.4056231379508972, "step": 1977 }, { "epoch": 5.415468856947296, "grad_norm": 2.1622138023376465, "learning_rate": 7.29041095890411e-07, "log_odds_chosen": 2.3290562629699707, "log_odds_ratio": -0.16180332005023956, "logits/chosen": 0.8411803245544434, "logits/rejected": 0.7785111665725708, "logps/chosen": -1.650067925453186, "logps/rejected": -3.744605779647827, "loss": 0.6483, "nll_loss": 0.632125735282898, "rewards/accuracies": 1.0, "rewards/chosen": -0.16500680148601532, "rewards/margins": 0.2094537913799286, "rewards/rejected": -0.3744605779647827, "step": 1978 }, { "epoch": 5.418206707734429, "grad_norm": 2.5294189453125, "learning_rate": 7.28904109589041e-07, "log_odds_chosen": 4.071109771728516, "log_odds_ratio": -0.2205941379070282, "logits/chosen": 0.8114683628082275, "logits/rejected": 0.7690597176551819, "logps/chosen": -2.3324179649353027, "logps/rejected": -6.296962738037109, "loss": 0.7683, "nll_loss": 0.7461907863616943, "rewards/accuracies": 1.0, "rewards/chosen": -0.23324182629585266, "rewards/margins": 0.39645445346832275, "rewards/rejected": -0.629696249961853, "step": 1979 }, { "epoch": 5.420944558521561, "grad_norm": 2.528789758682251, "learning_rate": 7.287671232876712e-07, "log_odds_chosen": 1.4908430576324463, "log_odds_ratio": -0.2736503481864929, "logits/chosen": 0.8565986752510071, "logits/rejected": 0.8201162815093994, "logps/chosen": -1.8074328899383545, "logps/rejected": -3.1657490730285645, "loss": 0.797, "nll_loss": 0.7696083188056946, "rewards/accuracies": 1.0, "rewards/chosen": -0.1807432770729065, "rewards/margins": 0.13583162426948547, "rewards/rejected": -0.31657493114471436, "step": 1980 }, { "epoch": 5.423682409308693, "grad_norm": 2.2231197357177734, "learning_rate": 7.286301369863014e-07, "log_odds_chosen": 2.273145914077759, "log_odds_ratio": -0.17030423879623413, "logits/chosen": 1.1567738056182861, "logits/rejected": 1.101841688156128, "logps/chosen": -1.4010825157165527, "logps/rejected": -3.433168411254883, "loss": 0.6321, "nll_loss": 0.615077018737793, "rewards/accuracies": 1.0, "rewards/chosen": -0.14010825753211975, "rewards/margins": 0.2032085806131363, "rewards/rejected": -0.34331685304641724, "step": 1981 }, { "epoch": 5.426420260095825, "grad_norm": 2.653568744659424, "learning_rate": 7.284931506849314e-07, "log_odds_chosen": 0.846798300743103, "log_odds_ratio": -0.39804893732070923, "logits/chosen": 1.083404302597046, "logits/rejected": 1.0786333084106445, "logps/chosen": -2.3181982040405273, "logps/rejected": -3.076266288757324, "loss": 0.7369, "nll_loss": 0.6970661282539368, "rewards/accuracies": 1.0, "rewards/chosen": -0.23181983828544617, "rewards/margins": 0.07580681145191193, "rewards/rejected": -0.3076266348361969, "step": 1982 }, { "epoch": 5.429158110882957, "grad_norm": 2.828073501586914, "learning_rate": 7.283561643835616e-07, "log_odds_chosen": 3.8793563842773438, "log_odds_ratio": -0.21303211152553558, "logits/chosen": 0.8765867948532104, "logits/rejected": 0.9216270446777344, "logps/chosen": -2.0167880058288574, "logps/rejected": -5.74008321762085, "loss": 0.7391, "nll_loss": 0.7177960872650146, "rewards/accuracies": 0.875, "rewards/chosen": -0.2016788274049759, "rewards/margins": 0.3723294734954834, "rewards/rejected": -0.5740083456039429, "step": 1983 }, { "epoch": 5.431895961670089, "grad_norm": 2.5426220893859863, "learning_rate": 7.282191780821918e-07, "log_odds_chosen": 1.6577140092849731, "log_odds_ratio": -0.34460288286209106, "logits/chosen": 0.881930410861969, "logits/rejected": 0.8479024767875671, "logps/chosen": -2.0762224197387695, "logps/rejected": -3.6563127040863037, "loss": 0.7257, "nll_loss": 0.6911898255348206, "rewards/accuracies": 0.875, "rewards/chosen": -0.207622230052948, "rewards/margins": 0.15800900757312775, "rewards/rejected": -0.36563125252723694, "step": 1984 }, { "epoch": 5.434633812457221, "grad_norm": 3.9751219749450684, "learning_rate": 7.280821917808219e-07, "log_odds_chosen": 3.4356770515441895, "log_odds_ratio": -0.31664571166038513, "logits/chosen": 1.150858998298645, "logits/rejected": 1.103788137435913, "logps/chosen": -2.090822696685791, "logps/rejected": -5.375247478485107, "loss": 0.765, "nll_loss": 0.7333030104637146, "rewards/accuracies": 0.875, "rewards/chosen": -0.20908226072788239, "rewards/margins": 0.3284425139427185, "rewards/rejected": -0.5375247597694397, "step": 1985 }, { "epoch": 5.437371663244353, "grad_norm": 3.1989402770996094, "learning_rate": 7.27945205479452e-07, "log_odds_chosen": 1.7243516445159912, "log_odds_ratio": -0.39417847990989685, "logits/chosen": 0.845703125, "logits/rejected": 0.865180253982544, "logps/chosen": -1.7363916635513306, "logps/rejected": -3.357877254486084, "loss": 0.8708, "nll_loss": 0.8314201831817627, "rewards/accuracies": 0.75, "rewards/chosen": -0.173639178276062, "rewards/margins": 0.16214853525161743, "rewards/rejected": -0.33578771352767944, "step": 1986 }, { "epoch": 5.4401095140314855, "grad_norm": 2.564688205718994, "learning_rate": 7.278082191780821e-07, "log_odds_chosen": 2.819192886352539, "log_odds_ratio": -0.22702080011367798, "logits/chosen": 0.6646134257316589, "logits/rejected": 0.6749475002288818, "logps/chosen": -2.0315728187561035, "logps/rejected": -4.67767333984375, "loss": 0.6971, "nll_loss": 0.674379825592041, "rewards/accuracies": 1.0, "rewards/chosen": -0.20315727591514587, "rewards/margins": 0.26461008191108704, "rewards/rejected": -0.4677673578262329, "step": 1987 }, { "epoch": 5.4428473648186175, "grad_norm": 4.603559494018555, "learning_rate": 7.276712328767123e-07, "log_odds_chosen": 0.5676331520080566, "log_odds_ratio": -0.7889116406440735, "logits/chosen": 0.7702003717422485, "logits/rejected": 0.7507532835006714, "logps/chosen": -2.9336204528808594, "logps/rejected": -3.47808837890625, "loss": 0.8667, "nll_loss": 0.7877901792526245, "rewards/accuracies": 0.5, "rewards/chosen": -0.293362021446228, "rewards/margins": 0.054446808993816376, "rewards/rejected": -0.347808837890625, "step": 1988 }, { "epoch": 5.44558521560575, "grad_norm": 5.055527687072754, "learning_rate": 7.275342465753424e-07, "log_odds_chosen": 2.1973531246185303, "log_odds_ratio": -0.33771470189094543, "logits/chosen": 1.027974009513855, "logits/rejected": 1.081778645515442, "logps/chosen": -2.489786386489868, "logps/rejected": -4.5648884773254395, "loss": 0.8067, "nll_loss": 0.7729755640029907, "rewards/accuracies": 0.875, "rewards/chosen": -0.2489786446094513, "rewards/margins": 0.20751020312309265, "rewards/rejected": -0.45648884773254395, "step": 1989 }, { "epoch": 5.448323066392882, "grad_norm": 3.8816685676574707, "learning_rate": 7.273972602739725e-07, "log_odds_chosen": 3.315458297729492, "log_odds_ratio": -0.3028164505958557, "logits/chosen": 1.142354965209961, "logits/rejected": 1.1759012937545776, "logps/chosen": -2.715102195739746, "logps/rejected": -5.9498090744018555, "loss": 0.7809, "nll_loss": 0.7505967617034912, "rewards/accuracies": 0.875, "rewards/chosen": -0.2715102434158325, "rewards/margins": 0.32347068190574646, "rewards/rejected": -0.5949808955192566, "step": 1990 }, { "epoch": 5.451060917180014, "grad_norm": 2.477982521057129, "learning_rate": 7.272602739726027e-07, "log_odds_chosen": 1.3247450590133667, "log_odds_ratio": -0.33293893933296204, "logits/chosen": 0.8252696990966797, "logits/rejected": 0.8522413372993469, "logps/chosen": -2.0088818073272705, "logps/rejected": -3.1624021530151367, "loss": 0.7437, "nll_loss": 0.7104062438011169, "rewards/accuracies": 1.0, "rewards/chosen": -0.20088820159435272, "rewards/margins": 0.11535201221704483, "rewards/rejected": -0.31624019145965576, "step": 1991 }, { "epoch": 5.453798767967146, "grad_norm": 3.926809310913086, "learning_rate": 7.271232876712329e-07, "log_odds_chosen": 1.3835302591323853, "log_odds_ratio": -0.3149789571762085, "logits/chosen": 0.7140977382659912, "logits/rejected": 0.7047544121742249, "logps/chosen": -2.0108702182769775, "logps/rejected": -3.2769265174865723, "loss": 0.7944, "nll_loss": 0.7628659605979919, "rewards/accuracies": 0.875, "rewards/chosen": -0.20108702778816223, "rewards/margins": 0.12660565972328186, "rewards/rejected": -0.3276926875114441, "step": 1992 }, { "epoch": 5.456536618754278, "grad_norm": 2.69307279586792, "learning_rate": 7.269863013698629e-07, "log_odds_chosen": 2.1761789321899414, "log_odds_ratio": -0.28245338797569275, "logits/chosen": 0.6103997826576233, "logits/rejected": 0.5586875677108765, "logps/chosen": -1.9725337028503418, "logps/rejected": -4.027294158935547, "loss": 0.7904, "nll_loss": 0.7621871829032898, "rewards/accuracies": 0.875, "rewards/chosen": -0.19725336134433746, "rewards/margins": 0.2054760456085205, "rewards/rejected": -0.40272945165634155, "step": 1993 }, { "epoch": 5.45927446954141, "grad_norm": 3.9505114555358887, "learning_rate": 7.268493150684931e-07, "log_odds_chosen": 1.5741846561431885, "log_odds_ratio": -0.5932447910308838, "logits/chosen": 0.6513184309005737, "logits/rejected": 0.6749756932258606, "logps/chosen": -1.997986912727356, "logps/rejected": -3.3764607906341553, "loss": 0.7858, "nll_loss": 0.7264284491539001, "rewards/accuracies": 0.75, "rewards/chosen": -0.19979868829250336, "rewards/margins": 0.1378474086523056, "rewards/rejected": -0.33764612674713135, "step": 1994 }, { "epoch": 5.462012320328542, "grad_norm": 2.7994332313537598, "learning_rate": 7.267123287671233e-07, "log_odds_chosen": 3.0794572830200195, "log_odds_ratio": -0.1351923942565918, "logits/chosen": 1.2186408042907715, "logits/rejected": 1.2510749101638794, "logps/chosen": -1.884007215499878, "logps/rejected": -4.795713424682617, "loss": 0.6739, "nll_loss": 0.660356879234314, "rewards/accuracies": 1.0, "rewards/chosen": -0.18840071558952332, "rewards/margins": 0.291170597076416, "rewards/rejected": -0.47957131266593933, "step": 1995 }, { "epoch": 5.464750171115674, "grad_norm": 5.704451084136963, "learning_rate": 7.265753424657533e-07, "log_odds_chosen": 0.8575020432472229, "log_odds_ratio": -0.8370047807693481, "logits/chosen": 0.7951002717018127, "logits/rejected": 0.7758700251579285, "logps/chosen": -3.220496416091919, "logps/rejected": -3.9621593952178955, "loss": 0.8211, "nll_loss": 0.7374387979507446, "rewards/accuracies": 0.75, "rewards/chosen": -0.32204964756965637, "rewards/margins": 0.07416632771492004, "rewards/rejected": -0.39621594548225403, "step": 1996 }, { "epoch": 5.467488021902806, "grad_norm": 2.7700703144073486, "learning_rate": 7.264383561643835e-07, "log_odds_chosen": 1.5611263513565063, "log_odds_ratio": -0.36037182807922363, "logits/chosen": 0.6635804176330566, "logits/rejected": 0.5890917181968689, "logps/chosen": -1.861158847808838, "logps/rejected": -3.2665305137634277, "loss": 0.7609, "nll_loss": 0.7248367667198181, "rewards/accuracies": 0.75, "rewards/chosen": -0.18611589074134827, "rewards/margins": 0.14053715765476227, "rewards/rejected": -0.32665306329727173, "step": 1997 }, { "epoch": 5.470225872689938, "grad_norm": 2.456444501876831, "learning_rate": 7.263013698630137e-07, "log_odds_chosen": 3.5383260250091553, "log_odds_ratio": -0.25832709670066833, "logits/chosen": 0.813961923122406, "logits/rejected": 0.8130509853363037, "logps/chosen": -2.207745313644409, "logps/rejected": -5.562145233154297, "loss": 0.7573, "nll_loss": 0.731487512588501, "rewards/accuracies": 1.0, "rewards/chosen": -0.22077453136444092, "rewards/margins": 0.3354399800300598, "rewards/rejected": -0.5562145113945007, "step": 1998 }, { "epoch": 5.47296372347707, "grad_norm": 3.2012197971343994, "learning_rate": 7.261643835616437e-07, "log_odds_chosen": 2.515742301940918, "log_odds_ratio": -0.5181023478507996, "logits/chosen": 0.7579243183135986, "logits/rejected": 0.7386971712112427, "logps/chosen": -2.1706161499023438, "logps/rejected": -4.594766616821289, "loss": 0.9244, "nll_loss": 0.8725424408912659, "rewards/accuracies": 0.75, "rewards/chosen": -0.2170616090297699, "rewards/margins": 0.24241505563259125, "rewards/rejected": -0.45947664976119995, "step": 1999 }, { "epoch": 5.475701574264202, "grad_norm": 2.301530599594116, "learning_rate": 7.260273972602739e-07, "log_odds_chosen": 2.089118480682373, "log_odds_ratio": -0.23835627734661102, "logits/chosen": 0.5754072666168213, "logits/rejected": 0.4187750518321991, "logps/chosen": -1.7020350694656372, "logps/rejected": -3.6147749423980713, "loss": 0.731, "nll_loss": 0.7071484923362732, "rewards/accuracies": 1.0, "rewards/chosen": -0.17020350694656372, "rewards/margins": 0.1912740170955658, "rewards/rejected": -0.36147749423980713, "step": 2000 }, { "epoch": 5.478439425051334, "grad_norm": 2.555318593978882, "learning_rate": 7.258904109589041e-07, "log_odds_chosen": 2.1773793697357178, "log_odds_ratio": -0.34399664402008057, "logits/chosen": 0.6681867837905884, "logits/rejected": 0.6131387948989868, "logps/chosen": -1.5056307315826416, "logps/rejected": -3.4113495349884033, "loss": 0.7822, "nll_loss": 0.7477517127990723, "rewards/accuracies": 0.875, "rewards/chosen": -0.1505630761384964, "rewards/margins": 0.1905718892812729, "rewards/rejected": -0.3411349654197693, "step": 2001 }, { "epoch": 5.4811772758384665, "grad_norm": 3.0119452476501465, "learning_rate": 7.257534246575342e-07, "log_odds_chosen": 1.4612799882888794, "log_odds_ratio": -0.28933650255203247, "logits/chosen": 0.9222072958946228, "logits/rejected": 0.937110185623169, "logps/chosen": -1.6305738687515259, "logps/rejected": -2.932448387145996, "loss": 0.744, "nll_loss": 0.715043842792511, "rewards/accuracies": 1.0, "rewards/chosen": -0.1630573868751526, "rewards/margins": 0.13018743693828583, "rewards/rejected": -0.2932448387145996, "step": 2002 }, { "epoch": 5.4839151266255985, "grad_norm": 2.936614751815796, "learning_rate": 7.256164383561643e-07, "log_odds_chosen": 2.815178871154785, "log_odds_ratio": -0.2737359404563904, "logits/chosen": 0.7649738788604736, "logits/rejected": 0.729606032371521, "logps/chosen": -2.476844310760498, "logps/rejected": -5.210260391235352, "loss": 0.8202, "nll_loss": 0.7927825450897217, "rewards/accuracies": 0.875, "rewards/chosen": -0.24768441915512085, "rewards/margins": 0.2733416259288788, "rewards/rejected": -0.521026074886322, "step": 2003 }, { "epoch": 5.486652977412731, "grad_norm": 2.5219297409057617, "learning_rate": 7.254794520547944e-07, "log_odds_chosen": 2.5113096237182617, "log_odds_ratio": -0.3508523106575012, "logits/chosen": 0.9208735227584839, "logits/rejected": 0.8857812881469727, "logps/chosen": -1.7998969554901123, "logps/rejected": -4.214883804321289, "loss": 0.8303, "nll_loss": 0.7951878309249878, "rewards/accuracies": 0.875, "rewards/chosen": -0.17998969554901123, "rewards/margins": 0.241498664021492, "rewards/rejected": -0.42148834466934204, "step": 2004 }, { "epoch": 5.489390828199863, "grad_norm": 2.363213062286377, "learning_rate": 7.253424657534246e-07, "log_odds_chosen": 1.5037838220596313, "log_odds_ratio": -0.42216917872428894, "logits/chosen": 0.7462013363838196, "logits/rejected": 0.7392593622207642, "logps/chosen": -1.904337763786316, "logps/rejected": -3.2006349563598633, "loss": 0.7541, "nll_loss": 0.7118527293205261, "rewards/accuracies": 0.625, "rewards/chosen": -0.19043377041816711, "rewards/margins": 0.12962974607944489, "rewards/rejected": -0.3200635313987732, "step": 2005 }, { "epoch": 5.492128678986996, "grad_norm": 2.141470193862915, "learning_rate": 7.252054794520547e-07, "log_odds_chosen": 3.0918807983398438, "log_odds_ratio": -0.20301775634288788, "logits/chosen": 0.6686108708381653, "logits/rejected": 0.6886656284332275, "logps/chosen": -1.9051694869995117, "logps/rejected": -4.879580497741699, "loss": 0.7002, "nll_loss": 0.6799300909042358, "rewards/accuracies": 1.0, "rewards/chosen": -0.19051696360111237, "rewards/margins": 0.29744115471839905, "rewards/rejected": -0.48795807361602783, "step": 2006 }, { "epoch": 5.494866529774128, "grad_norm": 2.385523796081543, "learning_rate": 7.250684931506848e-07, "log_odds_chosen": 2.1268177032470703, "log_odds_ratio": -0.2091769129037857, "logits/chosen": 0.9816328287124634, "logits/rejected": 0.9951522350311279, "logps/chosen": -1.8296215534210205, "logps/rejected": -3.7985057830810547, "loss": 0.7647, "nll_loss": 0.7437976598739624, "rewards/accuracies": 1.0, "rewards/chosen": -0.18296214938163757, "rewards/margins": 0.19688843190670013, "rewards/rejected": -0.3798505663871765, "step": 2007 }, { "epoch": 5.49760438056126, "grad_norm": 2.5893094539642334, "learning_rate": 7.24931506849315e-07, "log_odds_chosen": 1.9715170860290527, "log_odds_ratio": -0.24607989192008972, "logits/chosen": 0.4963149130344391, "logits/rejected": 0.4314897358417511, "logps/chosen": -1.4667762517929077, "logps/rejected": -3.2259254455566406, "loss": 0.7405, "nll_loss": 0.7158584594726562, "rewards/accuracies": 0.875, "rewards/chosen": -0.1466776430606842, "rewards/margins": 0.17591488361358643, "rewards/rejected": -0.32259252667427063, "step": 2008 }, { "epoch": 5.500342231348392, "grad_norm": 3.2119967937469482, "learning_rate": 7.247945205479452e-07, "log_odds_chosen": 2.1546688079833984, "log_odds_ratio": -0.29658210277557373, "logits/chosen": 0.6795452833175659, "logits/rejected": 0.7062781453132629, "logps/chosen": -2.564382553100586, "logps/rejected": -4.6299848556518555, "loss": 0.7856, "nll_loss": 0.7559900283813477, "rewards/accuracies": 0.875, "rewards/chosen": -0.2564382553100586, "rewards/margins": 0.20656028389930725, "rewards/rejected": -0.46299853920936584, "step": 2009 }, { "epoch": 5.503080082135524, "grad_norm": 2.4271750450134277, "learning_rate": 7.246575342465752e-07, "log_odds_chosen": 1.291500449180603, "log_odds_ratio": -0.3800223469734192, "logits/chosen": 0.6756230592727661, "logits/rejected": 0.6707536578178406, "logps/chosen": -2.2809934616088867, "logps/rejected": -3.5191664695739746, "loss": 0.7603, "nll_loss": 0.722288191318512, "rewards/accuracies": 0.875, "rewards/chosen": -0.22809936106204987, "rewards/margins": 0.12381728738546371, "rewards/rejected": -0.35191667079925537, "step": 2010 }, { "epoch": 5.505817932922656, "grad_norm": 2.4446563720703125, "learning_rate": 7.245205479452054e-07, "log_odds_chosen": 2.7314765453338623, "log_odds_ratio": -0.16034774482250214, "logits/chosen": 0.7230512499809265, "logits/rejected": 0.6819560527801514, "logps/chosen": -2.1186647415161133, "logps/rejected": -4.7222161293029785, "loss": 0.7653, "nll_loss": 0.749308705329895, "rewards/accuracies": 1.0, "rewards/chosen": -0.21186646819114685, "rewards/margins": 0.260355144739151, "rewards/rejected": -0.47222161293029785, "step": 2011 }, { "epoch": 5.508555783709788, "grad_norm": 2.944401502609253, "learning_rate": 7.243835616438357e-07, "log_odds_chosen": 4.2780561447143555, "log_odds_ratio": -0.184464231133461, "logits/chosen": 0.8419072031974792, "logits/rejected": 0.8446935415267944, "logps/chosen": -2.0586776733398438, "logps/rejected": -6.186840534210205, "loss": 0.8197, "nll_loss": 0.8012871146202087, "rewards/accuracies": 1.0, "rewards/chosen": -0.20586776733398438, "rewards/margins": 0.41281628608703613, "rewards/rejected": -0.6186841130256653, "step": 2012 }, { "epoch": 5.51129363449692, "grad_norm": 4.2295966148376465, "learning_rate": 7.242465753424656e-07, "log_odds_chosen": 1.4735198020935059, "log_odds_ratio": -0.3502674996852875, "logits/chosen": 1.2342894077301025, "logits/rejected": 1.205145239830017, "logps/chosen": -2.5893187522888184, "logps/rejected": -3.9563417434692383, "loss": 0.714, "nll_loss": 0.6789366006851196, "rewards/accuracies": 0.75, "rewards/chosen": -0.2589319348335266, "rewards/margins": 0.13670223951339722, "rewards/rejected": -0.39563414454460144, "step": 2013 }, { "epoch": 5.514031485284052, "grad_norm": 2.5556528568267822, "learning_rate": 7.241095890410958e-07, "log_odds_chosen": 2.758222818374634, "log_odds_ratio": -0.20229990780353546, "logits/chosen": 0.7235107421875, "logits/rejected": 0.6831291317939758, "logps/chosen": -2.3286020755767822, "logps/rejected": -4.972040176391602, "loss": 0.6883, "nll_loss": 0.6680306196212769, "rewards/accuracies": 1.0, "rewards/chosen": -0.23286020755767822, "rewards/margins": 0.264343798160553, "rewards/rejected": -0.4972040057182312, "step": 2014 }, { "epoch": 5.516769336071184, "grad_norm": 3.167147159576416, "learning_rate": 7.239726027397261e-07, "log_odds_chosen": 1.272690773010254, "log_odds_ratio": -0.3969962000846863, "logits/chosen": 0.48527824878692627, "logits/rejected": 0.4262360632419586, "logps/chosen": -2.179124116897583, "logps/rejected": -3.3661298751831055, "loss": 0.7927, "nll_loss": 0.7530353665351868, "rewards/accuracies": 0.875, "rewards/chosen": -0.21791240572929382, "rewards/margins": 0.11870057880878448, "rewards/rejected": -0.3366129994392395, "step": 2015 }, { "epoch": 5.519507186858316, "grad_norm": 5.963730335235596, "learning_rate": 7.238356164383562e-07, "log_odds_chosen": 0.4135701656341553, "log_odds_ratio": -1.1360726356506348, "logits/chosen": 1.0682629346847534, "logits/rejected": 1.0109829902648926, "logps/chosen": -3.158207893371582, "logps/rejected": -3.481954574584961, "loss": 0.8827, "nll_loss": 0.769091784954071, "rewards/accuracies": 0.75, "rewards/chosen": -0.3158207833766937, "rewards/margins": 0.03237465023994446, "rewards/rejected": -0.34819546341896057, "step": 2016 }, { "epoch": 5.522245037645448, "grad_norm": 4.888474464416504, "learning_rate": 7.236986301369863e-07, "log_odds_chosen": 1.7382937669754028, "log_odds_ratio": -0.5205321907997131, "logits/chosen": 1.120788812637329, "logits/rejected": 1.1034269332885742, "logps/chosen": -2.6069469451904297, "logps/rejected": -4.238662242889404, "loss": 0.759, "nll_loss": 0.7069295644760132, "rewards/accuracies": 0.75, "rewards/chosen": -0.2606947124004364, "rewards/margins": 0.16317152976989746, "rewards/rejected": -0.42386624217033386, "step": 2017 }, { "epoch": 5.5249828884325805, "grad_norm": 3.2012577056884766, "learning_rate": 7.235616438356164e-07, "log_odds_chosen": 1.5707720518112183, "log_odds_ratio": -0.3337422013282776, "logits/chosen": 0.5996403098106384, "logits/rejected": 0.5819278359413147, "logps/chosen": -2.395153760910034, "logps/rejected": -3.7663466930389404, "loss": 0.8031, "nll_loss": 0.7697352170944214, "rewards/accuracies": 0.875, "rewards/chosen": -0.23951537907123566, "rewards/margins": 0.13711930811405182, "rewards/rejected": -0.37663471698760986, "step": 2018 }, { "epoch": 5.5277207392197125, "grad_norm": 2.363872528076172, "learning_rate": 7.234246575342466e-07, "log_odds_chosen": 1.9405546188354492, "log_odds_ratio": -0.21526683866977692, "logits/chosen": 0.7813934087753296, "logits/rejected": 0.772141695022583, "logps/chosen": -1.5962822437286377, "logps/rejected": -3.293667793273926, "loss": 0.7324, "nll_loss": 0.7108657360076904, "rewards/accuracies": 1.0, "rewards/chosen": -0.159628227353096, "rewards/margins": 0.16973857581615448, "rewards/rejected": -0.3293668031692505, "step": 2019 }, { "epoch": 5.530458590006845, "grad_norm": 2.819898843765259, "learning_rate": 7.232876712328767e-07, "log_odds_chosen": 2.7538886070251465, "log_odds_ratio": -0.24671080708503723, "logits/chosen": 0.7622681856155396, "logits/rejected": 0.7043939232826233, "logps/chosen": -1.907853364944458, "logps/rejected": -4.514611721038818, "loss": 0.7339, "nll_loss": 0.7091979384422302, "rewards/accuracies": 0.875, "rewards/chosen": -0.19078534841537476, "rewards/margins": 0.260675847530365, "rewards/rejected": -0.4514612555503845, "step": 2020 }, { "epoch": 5.533196440793977, "grad_norm": 2.817014455795288, "learning_rate": 7.231506849315068e-07, "log_odds_chosen": 3.0040106773376465, "log_odds_ratio": -0.13375574350357056, "logits/chosen": 0.860249400138855, "logits/rejected": 0.8745290637016296, "logps/chosen": -1.9344983100891113, "logps/rejected": -4.661346435546875, "loss": 0.7125, "nll_loss": 0.6991491913795471, "rewards/accuracies": 1.0, "rewards/chosen": -0.19344983994960785, "rewards/margins": 0.27268481254577637, "rewards/rejected": -0.4661346673965454, "step": 2021 }, { "epoch": 5.535934291581109, "grad_norm": 3.4853110313415527, "learning_rate": 7.23013698630137e-07, "log_odds_chosen": 1.3199836015701294, "log_odds_ratio": -0.48932719230651855, "logits/chosen": 0.8546568155288696, "logits/rejected": 0.8397937417030334, "logps/chosen": -2.6034107208251953, "logps/rejected": -3.8831257820129395, "loss": 0.8749, "nll_loss": 0.8260114192962646, "rewards/accuracies": 0.75, "rewards/chosen": -0.2603410482406616, "rewards/margins": 0.12797152996063232, "rewards/rejected": -0.38831257820129395, "step": 2022 }, { "epoch": 5.538672142368241, "grad_norm": 3.300618886947632, "learning_rate": 7.228767123287672e-07, "log_odds_chosen": 2.713796615600586, "log_odds_ratio": -0.37289634346961975, "logits/chosen": 1.0628116130828857, "logits/rejected": 1.0516715049743652, "logps/chosen": -2.480821132659912, "logps/rejected": -5.055485725402832, "loss": 0.6861, "nll_loss": 0.6487850546836853, "rewards/accuracies": 0.875, "rewards/chosen": -0.24808210134506226, "rewards/margins": 0.25746649503707886, "rewards/rejected": -0.5055485963821411, "step": 2023 }, { "epoch": 5.541409993155373, "grad_norm": 2.5148587226867676, "learning_rate": 7.227397260273972e-07, "log_odds_chosen": 1.888503074645996, "log_odds_ratio": -0.29474762082099915, "logits/chosen": 0.6218358874320984, "logits/rejected": 0.5407595038414001, "logps/chosen": -1.7212284803390503, "logps/rejected": -3.4754817485809326, "loss": 0.7242, "nll_loss": 0.6947247982025146, "rewards/accuracies": 1.0, "rewards/chosen": -0.17212286591529846, "rewards/margins": 0.17542532086372375, "rewards/rejected": -0.3475481867790222, "step": 2024 }, { "epoch": 5.544147843942505, "grad_norm": 2.3996260166168213, "learning_rate": 7.226027397260274e-07, "log_odds_chosen": 2.0935091972351074, "log_odds_ratio": -0.2608011066913605, "logits/chosen": 0.7367547154426575, "logits/rejected": 0.68323814868927, "logps/chosen": -1.6372486352920532, "logps/rejected": -3.5863587856292725, "loss": 0.7156, "nll_loss": 0.6894754767417908, "rewards/accuracies": 0.875, "rewards/chosen": -0.163724884390831, "rewards/margins": 0.19491101801395416, "rewards/rejected": -0.35863590240478516, "step": 2025 }, { "epoch": 5.546885694729637, "grad_norm": 2.8451809883117676, "learning_rate": 7.224657534246576e-07, "log_odds_chosen": 1.4753122329711914, "log_odds_ratio": -0.27121174335479736, "logits/chosen": 0.8874655961990356, "logits/rejected": 0.8244439363479614, "logps/chosen": -1.422141194343567, "logps/rejected": -2.7045862674713135, "loss": 0.6864, "nll_loss": 0.6592675447463989, "rewards/accuracies": 1.0, "rewards/chosen": -0.1422141194343567, "rewards/margins": 0.12824448943138123, "rewards/rejected": -0.2704586088657379, "step": 2026 }, { "epoch": 5.549623545516769, "grad_norm": 3.157447576522827, "learning_rate": 7.223287671232876e-07, "log_odds_chosen": 1.8685944080352783, "log_odds_ratio": -0.23709863424301147, "logits/chosen": 0.9103027582168579, "logits/rejected": 0.8745304346084595, "logps/chosen": -2.3693861961364746, "logps/rejected": -4.12021541595459, "loss": 0.7226, "nll_loss": 0.6988771557807922, "rewards/accuracies": 1.0, "rewards/chosen": -0.23693862557411194, "rewards/margins": 0.17508287727832794, "rewards/rejected": -0.4120215177536011, "step": 2027 }, { "epoch": 5.552361396303901, "grad_norm": 2.302021026611328, "learning_rate": 7.221917808219178e-07, "log_odds_chosen": 2.2323591709136963, "log_odds_ratio": -0.22726556658744812, "logits/chosen": 0.9731675386428833, "logits/rejected": 0.9369033575057983, "logps/chosen": -1.7841823101043701, "logps/rejected": -3.869570255279541, "loss": 0.7301, "nll_loss": 0.7074058055877686, "rewards/accuracies": 0.875, "rewards/chosen": -0.17841824889183044, "rewards/margins": 0.20853880047798157, "rewards/rejected": -0.386957049369812, "step": 2028 }, { "epoch": 5.555099247091033, "grad_norm": 2.4311683177948, "learning_rate": 7.22054794520548e-07, "log_odds_chosen": 1.3052407503128052, "log_odds_ratio": -0.36600035429000854, "logits/chosen": 0.6205868124961853, "logits/rejected": 0.5544034838676453, "logps/chosen": -1.9930366277694702, "logps/rejected": -3.243175983428955, "loss": 0.7526, "nll_loss": 0.7159548997879028, "rewards/accuracies": 0.875, "rewards/chosen": -0.19930367171764374, "rewards/margins": 0.12501394748687744, "rewards/rejected": -0.3243176341056824, "step": 2029 }, { "epoch": 5.557837097878165, "grad_norm": 2.905184745788574, "learning_rate": 7.219178082191781e-07, "log_odds_chosen": 1.228992223739624, "log_odds_ratio": -0.45919525623321533, "logits/chosen": 0.5848069190979004, "logits/rejected": 0.5377023220062256, "logps/chosen": -2.0535953044891357, "logps/rejected": -3.123873472213745, "loss": 0.7788, "nll_loss": 0.7328429818153381, "rewards/accuracies": 0.875, "rewards/chosen": -0.205359548330307, "rewards/margins": 0.1070278137922287, "rewards/rejected": -0.3123873472213745, "step": 2030 }, { "epoch": 5.560574948665298, "grad_norm": 2.5475802421569824, "learning_rate": 7.217808219178082e-07, "log_odds_chosen": 3.3879401683807373, "log_odds_ratio": -0.0957481786608696, "logits/chosen": 0.7159742116928101, "logits/rejected": 0.6377174258232117, "logps/chosen": -1.8093056678771973, "logps/rejected": -4.97568416595459, "loss": 0.7778, "nll_loss": 0.7682641744613647, "rewards/accuracies": 1.0, "rewards/chosen": -0.18093056976795197, "rewards/margins": 0.3166378438472748, "rewards/rejected": -0.49756842851638794, "step": 2031 }, { "epoch": 5.563312799452429, "grad_norm": 2.890787363052368, "learning_rate": 7.216438356164384e-07, "log_odds_chosen": 0.7393208146095276, "log_odds_ratio": -0.6120381355285645, "logits/chosen": 0.5842776298522949, "logits/rejected": 0.5463407635688782, "logps/chosen": -1.7328559160232544, "logps/rejected": -2.4302968978881836, "loss": 0.7871, "nll_loss": 0.725939929485321, "rewards/accuracies": 0.875, "rewards/chosen": -0.1732856035232544, "rewards/margins": 0.06974409520626068, "rewards/rejected": -0.24302968382835388, "step": 2032 }, { "epoch": 5.566050650239562, "grad_norm": 3.072564125061035, "learning_rate": 7.215068493150685e-07, "log_odds_chosen": 1.6818205118179321, "log_odds_ratio": -0.34775134921073914, "logits/chosen": 0.7576315402984619, "logits/rejected": 0.6219953298568726, "logps/chosen": -2.1635355949401855, "logps/rejected": -3.7299792766571045, "loss": 0.787, "nll_loss": 0.7522493004798889, "rewards/accuracies": 0.875, "rewards/chosen": -0.21635356545448303, "rewards/margins": 0.15664437413215637, "rewards/rejected": -0.372997909784317, "step": 2033 }, { "epoch": 5.568788501026694, "grad_norm": 2.5042123794555664, "learning_rate": 7.213698630136986e-07, "log_odds_chosen": 1.662662386894226, "log_odds_ratio": -0.2690926790237427, "logits/chosen": 0.8089514970779419, "logits/rejected": 0.7138233780860901, "logps/chosen": -2.27659273147583, "logps/rejected": -3.8445420265197754, "loss": 0.7362, "nll_loss": 0.7092989683151245, "rewards/accuracies": 1.0, "rewards/chosen": -0.22765928506851196, "rewards/margins": 0.15679492056369781, "rewards/rejected": -0.3844541907310486, "step": 2034 }, { "epoch": 5.5715263518138265, "grad_norm": 3.8004565238952637, "learning_rate": 7.212328767123287e-07, "log_odds_chosen": 1.3295984268188477, "log_odds_ratio": -0.3613089621067047, "logits/chosen": 0.8816171884536743, "logits/rejected": 0.8528380393981934, "logps/chosen": -2.748567581176758, "logps/rejected": -3.942293882369995, "loss": 0.743, "nll_loss": 0.706890344619751, "rewards/accuracies": 0.875, "rewards/chosen": -0.2748567461967468, "rewards/margins": 0.1193726658821106, "rewards/rejected": -0.3942294120788574, "step": 2035 }, { "epoch": 5.574264202600959, "grad_norm": 4.387291431427002, "learning_rate": 7.210958904109589e-07, "log_odds_chosen": 2.307830333709717, "log_odds_ratio": -0.470723956823349, "logits/chosen": 0.9363079071044922, "logits/rejected": 0.955358624458313, "logps/chosen": -2.537886142730713, "logps/rejected": -4.658441066741943, "loss": 0.7195, "nll_loss": 0.6724363565444946, "rewards/accuracies": 0.75, "rewards/chosen": -0.25378862023353577, "rewards/margins": 0.2120554894208908, "rewards/rejected": -0.4658440947532654, "step": 2036 }, { "epoch": 5.577002053388091, "grad_norm": 2.402177095413208, "learning_rate": 7.209589041095891e-07, "log_odds_chosen": 2.8447322845458984, "log_odds_ratio": -0.28205132484436035, "logits/chosen": 0.636131227016449, "logits/rejected": 0.551639199256897, "logps/chosen": -1.463552713394165, "logps/rejected": -4.093609809875488, "loss": 0.7466, "nll_loss": 0.7184363007545471, "rewards/accuracies": 0.875, "rewards/chosen": -0.1463552713394165, "rewards/margins": 0.26300570368766785, "rewards/rejected": -0.40936100482940674, "step": 2037 }, { "epoch": 5.579739904175223, "grad_norm": 5.349384784698486, "learning_rate": 7.208219178082191e-07, "log_odds_chosen": 2.1952409744262695, "log_odds_ratio": -0.7855197787284851, "logits/chosen": 0.9992352724075317, "logits/rejected": 0.9901034832000732, "logps/chosen": -2.751483917236328, "logps/rejected": -4.766066551208496, "loss": 0.8163, "nll_loss": 0.7377973198890686, "rewards/accuracies": 0.75, "rewards/chosen": -0.2751483619213104, "rewards/margins": 0.20145830512046814, "rewards/rejected": -0.47660666704177856, "step": 2038 }, { "epoch": 5.582477754962355, "grad_norm": 2.493332862854004, "learning_rate": 7.206849315068493e-07, "log_odds_chosen": 2.2571353912353516, "log_odds_ratio": -0.18976591527462006, "logits/chosen": 0.9126960039138794, "logits/rejected": 0.9139356017112732, "logps/chosen": -2.210066080093384, "logps/rejected": -4.348686218261719, "loss": 0.7128, "nll_loss": 0.6938666105270386, "rewards/accuracies": 1.0, "rewards/chosen": -0.2210065871477127, "rewards/margins": 0.21386203169822693, "rewards/rejected": -0.43486863374710083, "step": 2039 }, { "epoch": 5.585215605749487, "grad_norm": 3.0179555416107178, "learning_rate": 7.205479452054795e-07, "log_odds_chosen": 1.9720826148986816, "log_odds_ratio": -0.2332943230867386, "logits/chosen": 0.82830411195755, "logits/rejected": 0.7852393388748169, "logps/chosen": -2.0127573013305664, "logps/rejected": -3.8473732471466064, "loss": 0.7119, "nll_loss": 0.688596785068512, "rewards/accuracies": 1.0, "rewards/chosen": -0.20127572119235992, "rewards/margins": 0.18346160650253296, "rewards/rejected": -0.3847373425960541, "step": 2040 }, { "epoch": 5.587953456536619, "grad_norm": 2.5120699405670166, "learning_rate": 7.204109589041095e-07, "log_odds_chosen": 1.785079836845398, "log_odds_ratio": -0.3603213131427765, "logits/chosen": 0.7501178979873657, "logits/rejected": 0.7032334804534912, "logps/chosen": -1.9882922172546387, "logps/rejected": -3.6901822090148926, "loss": 0.7609, "nll_loss": 0.7249138355255127, "rewards/accuracies": 0.875, "rewards/chosen": -0.19882921874523163, "rewards/margins": 0.1701890379190445, "rewards/rejected": -0.3690182566642761, "step": 2041 }, { "epoch": 5.590691307323751, "grad_norm": 2.4387056827545166, "learning_rate": 7.202739726027397e-07, "log_odds_chosen": 3.500837564468384, "log_odds_ratio": -0.21136265993118286, "logits/chosen": 0.8328543901443481, "logits/rejected": 0.8353688716888428, "logps/chosen": -1.998291254043579, "logps/rejected": -5.391742706298828, "loss": 0.7552, "nll_loss": 0.7340250015258789, "rewards/accuracies": 0.875, "rewards/chosen": -0.1998291164636612, "rewards/margins": 0.33934515714645386, "rewards/rejected": -0.5391743183135986, "step": 2042 }, { "epoch": 5.593429158110883, "grad_norm": 2.3773958683013916, "learning_rate": 7.201369863013699e-07, "log_odds_chosen": 3.1712474822998047, "log_odds_ratio": -0.19154302775859833, "logits/chosen": 1.0178570747375488, "logits/rejected": 1.0178152322769165, "logps/chosen": -2.2456624507904053, "logps/rejected": -5.311705589294434, "loss": 0.6468, "nll_loss": 0.6276103854179382, "rewards/accuracies": 1.0, "rewards/chosen": -0.224566251039505, "rewards/margins": 0.3066043257713318, "rewards/rejected": -0.5311705470085144, "step": 2043 }, { "epoch": 5.596167008898015, "grad_norm": 2.3922572135925293, "learning_rate": 7.2e-07, "log_odds_chosen": 2.776937961578369, "log_odds_ratio": -0.18567603826522827, "logits/chosen": 1.1032078266143799, "logits/rejected": 1.1156671047210693, "logps/chosen": -2.003819704055786, "logps/rejected": -4.597256183624268, "loss": 0.7318, "nll_loss": 0.7132517099380493, "rewards/accuracies": 1.0, "rewards/chosen": -0.20038196444511414, "rewards/margins": 0.25934362411499023, "rewards/rejected": -0.45972558856010437, "step": 2044 }, { "epoch": 5.598904859685147, "grad_norm": 5.164119243621826, "learning_rate": 7.198630136986301e-07, "log_odds_chosen": 2.6292834281921387, "log_odds_ratio": -0.507689356803894, "logits/chosen": 1.0697864294052124, "logits/rejected": 1.0338364839553833, "logps/chosen": -2.5303730964660645, "logps/rejected": -5.015315055847168, "loss": 0.7699, "nll_loss": 0.7191562056541443, "rewards/accuracies": 0.875, "rewards/chosen": -0.25303730368614197, "rewards/margins": 0.2484942376613617, "rewards/rejected": -0.5015315413475037, "step": 2045 }, { "epoch": 5.601642710472279, "grad_norm": 2.9064407348632812, "learning_rate": 7.197260273972603e-07, "log_odds_chosen": 1.7151578664779663, "log_odds_ratio": -0.4219144284725189, "logits/chosen": 0.5580708384513855, "logits/rejected": 0.5020180344581604, "logps/chosen": -1.6883392333984375, "logps/rejected": -3.2216897010803223, "loss": 0.7969, "nll_loss": 0.7547483444213867, "rewards/accuracies": 0.875, "rewards/chosen": -0.1688339114189148, "rewards/margins": 0.1533350646495819, "rewards/rejected": -0.3221690058708191, "step": 2046 }, { "epoch": 5.604380561259411, "grad_norm": 2.812748432159424, "learning_rate": 7.195890410958904e-07, "log_odds_chosen": 0.6162622570991516, "log_odds_ratio": -0.4659236669540405, "logits/chosen": 0.6534740924835205, "logits/rejected": 0.6169320344924927, "logps/chosen": -2.013899087905884, "logps/rejected": -2.5400125980377197, "loss": 0.8439, "nll_loss": 0.7973024845123291, "rewards/accuracies": 0.875, "rewards/chosen": -0.20138989388942719, "rewards/margins": 0.05261136591434479, "rewards/rejected": -0.254001259803772, "step": 2047 }, { "epoch": 5.607118412046543, "grad_norm": 2.372122049331665, "learning_rate": 7.194520547945205e-07, "log_odds_chosen": 2.549128532409668, "log_odds_ratio": -0.26621580123901367, "logits/chosen": 0.8312082886695862, "logits/rejected": 0.7900121808052063, "logps/chosen": -1.6020119190216064, "logps/rejected": -4.001044750213623, "loss": 0.6724, "nll_loss": 0.6458001732826233, "rewards/accuracies": 0.875, "rewards/chosen": -0.16020120680332184, "rewards/margins": 0.2399032860994339, "rewards/rejected": -0.4001045227050781, "step": 2048 }, { "epoch": 5.6098562628336754, "grad_norm": 2.967576026916504, "learning_rate": 7.193150684931506e-07, "log_odds_chosen": 1.79197096824646, "log_odds_ratio": -0.31647804379463196, "logits/chosen": 0.6277461051940918, "logits/rejected": 0.5399499535560608, "logps/chosen": -1.7030080556869507, "logps/rejected": -3.2944717407226562, "loss": 0.7202, "nll_loss": 0.6885777115821838, "rewards/accuracies": 1.0, "rewards/chosen": -0.17030079662799835, "rewards/margins": 0.15914636850357056, "rewards/rejected": -0.3294471800327301, "step": 2049 }, { "epoch": 5.6125941136208075, "grad_norm": 2.8222758769989014, "learning_rate": 7.191780821917808e-07, "log_odds_chosen": 3.565109968185425, "log_odds_ratio": -0.20623353123664856, "logits/chosen": 1.012268304824829, "logits/rejected": 1.0571986436843872, "logps/chosen": -2.485137939453125, "logps/rejected": -5.92298698425293, "loss": 0.7401, "nll_loss": 0.7194676995277405, "rewards/accuracies": 1.0, "rewards/chosen": -0.24851378798484802, "rewards/margins": 0.34378498792648315, "rewards/rejected": -0.5922987461090088, "step": 2050 }, { "epoch": 5.61533196440794, "grad_norm": 2.536113977432251, "learning_rate": 7.19041095890411e-07, "log_odds_chosen": 1.5699678659439087, "log_odds_ratio": -0.27176880836486816, "logits/chosen": 0.8511998653411865, "logits/rejected": 0.8094611763954163, "logps/chosen": -1.6768003702163696, "logps/rejected": -3.0908026695251465, "loss": 0.7259, "nll_loss": 0.6987336874008179, "rewards/accuracies": 1.0, "rewards/chosen": -0.167680025100708, "rewards/margins": 0.1414002776145935, "rewards/rejected": -0.3090803027153015, "step": 2051 }, { "epoch": 5.618069815195072, "grad_norm": 2.475478172302246, "learning_rate": 7.18904109589041e-07, "log_odds_chosen": 2.2038869857788086, "log_odds_ratio": -0.1874089241027832, "logits/chosen": 1.1116242408752441, "logits/rejected": 1.1671241521835327, "logps/chosen": -1.8086693286895752, "logps/rejected": -3.8512985706329346, "loss": 0.663, "nll_loss": 0.6442204117774963, "rewards/accuracies": 1.0, "rewards/chosen": -0.18086692690849304, "rewards/margins": 0.20426292717456818, "rewards/rejected": -0.3851298689842224, "step": 2052 }, { "epoch": 5.620807665982204, "grad_norm": 3.0468616485595703, "learning_rate": 7.187671232876712e-07, "log_odds_chosen": 2.5308446884155273, "log_odds_ratio": -0.3171945810317993, "logits/chosen": 0.9972691535949707, "logits/rejected": 0.9820045232772827, "logps/chosen": -1.8902170658111572, "logps/rejected": -4.222102165222168, "loss": 0.7872, "nll_loss": 0.7554585337638855, "rewards/accuracies": 0.875, "rewards/chosen": -0.18902170658111572, "rewards/margins": 0.23318850994110107, "rewards/rejected": -0.4222102165222168, "step": 2053 }, { "epoch": 5.623545516769336, "grad_norm": 4.994281768798828, "learning_rate": 7.186301369863014e-07, "log_odds_chosen": 1.0643584728240967, "log_odds_ratio": -0.6501815319061279, "logits/chosen": 0.8142653107643127, "logits/rejected": 0.8038749694824219, "logps/chosen": -2.3830268383026123, "logps/rejected": -3.329052686691284, "loss": 0.9113, "nll_loss": 0.8463205695152283, "rewards/accuracies": 0.625, "rewards/chosen": -0.23830266296863556, "rewards/margins": 0.09460259228944778, "rewards/rejected": -0.33290526270866394, "step": 2054 }, { "epoch": 5.626283367556468, "grad_norm": 3.5672519207000732, "learning_rate": 7.184931506849314e-07, "log_odds_chosen": 1.1766773462295532, "log_odds_ratio": -0.6063677072525024, "logits/chosen": 0.694757878780365, "logits/rejected": 0.5963470935821533, "logps/chosen": -2.275763988494873, "logps/rejected": -3.326385021209717, "loss": 0.8502, "nll_loss": 0.7895259261131287, "rewards/accuracies": 0.875, "rewards/chosen": -0.22757640480995178, "rewards/margins": 0.10506211221218109, "rewards/rejected": -0.3326385021209717, "step": 2055 }, { "epoch": 5.6290212183436, "grad_norm": 2.982478380203247, "learning_rate": 7.183561643835616e-07, "log_odds_chosen": 0.6311404705047607, "log_odds_ratio": -0.645899772644043, "logits/chosen": 1.0629366636276245, "logits/rejected": 1.0811874866485596, "logps/chosen": -2.830916404724121, "logps/rejected": -3.461076021194458, "loss": 0.8576, "nll_loss": 0.792993426322937, "rewards/accuracies": 0.625, "rewards/chosen": -0.28309163451194763, "rewards/margins": 0.06301598995923996, "rewards/rejected": -0.3461076021194458, "step": 2056 }, { "epoch": 5.631759069130732, "grad_norm": 3.867934465408325, "learning_rate": 7.182191780821918e-07, "log_odds_chosen": 2.0809803009033203, "log_odds_ratio": -0.438057005405426, "logits/chosen": 1.020117998123169, "logits/rejected": 1.0699571371078491, "logps/chosen": -2.49322247505188, "logps/rejected": -4.430951118469238, "loss": 0.6906, "nll_loss": 0.6468374729156494, "rewards/accuracies": 0.875, "rewards/chosen": -0.24932223558425903, "rewards/margins": 0.19377291202545166, "rewards/rejected": -0.4430951476097107, "step": 2057 }, { "epoch": 5.634496919917865, "grad_norm": 5.055258274078369, "learning_rate": 7.180821917808219e-07, "log_odds_chosen": 2.147388458251953, "log_odds_ratio": -0.45000797510147095, "logits/chosen": 0.9458827972412109, "logits/rejected": 0.8904780745506287, "logps/chosen": -2.1752872467041016, "logps/rejected": -4.189381122589111, "loss": 0.8503, "nll_loss": 0.805345356464386, "rewards/accuracies": 0.75, "rewards/chosen": -0.21752870082855225, "rewards/margins": 0.20140941441059113, "rewards/rejected": -0.41893815994262695, "step": 2058 }, { "epoch": 5.637234770704996, "grad_norm": 2.3990485668182373, "learning_rate": 7.17945205479452e-07, "log_odds_chosen": 1.8910589218139648, "log_odds_ratio": -0.44724586606025696, "logits/chosen": 0.6582704782485962, "logits/rejected": 0.5889644026756287, "logps/chosen": -2.106975555419922, "logps/rejected": -3.970755100250244, "loss": 0.8301, "nll_loss": 0.7854225039482117, "rewards/accuracies": 0.75, "rewards/chosen": -0.21069753170013428, "rewards/margins": 0.18637797236442566, "rewards/rejected": -0.3970755338668823, "step": 2059 }, { "epoch": 5.639972621492129, "grad_norm": 7.558022975921631, "learning_rate": 7.178082191780822e-07, "log_odds_chosen": 1.5768897533416748, "log_odds_ratio": -0.42846179008483887, "logits/chosen": 1.2155044078826904, "logits/rejected": 1.2154247760772705, "logps/chosen": -2.811694622039795, "logps/rejected": -4.253015041351318, "loss": 0.7133, "nll_loss": 0.6704061627388, "rewards/accuracies": 0.75, "rewards/chosen": -0.28116947412490845, "rewards/margins": 0.14413204789161682, "rewards/rejected": -0.42530152201652527, "step": 2060 }, { "epoch": 5.642710472279261, "grad_norm": 2.4463794231414795, "learning_rate": 7.176712328767123e-07, "log_odds_chosen": 2.1844592094421387, "log_odds_ratio": -0.25430434942245483, "logits/chosen": 0.6444292068481445, "logits/rejected": 0.6226785182952881, "logps/chosen": -1.6808357238769531, "logps/rejected": -3.6591732501983643, "loss": 0.7176, "nll_loss": 0.6921303272247314, "rewards/accuracies": 0.875, "rewards/chosen": -0.1680835634469986, "rewards/margins": 0.19783376157283783, "rewards/rejected": -0.3659173250198364, "step": 2061 }, { "epoch": 5.645448323066393, "grad_norm": 2.74676775932312, "learning_rate": 7.175342465753424e-07, "log_odds_chosen": 2.169255256652832, "log_odds_ratio": -0.23534858226776123, "logits/chosen": 1.095169186592102, "logits/rejected": 1.1017459630966187, "logps/chosen": -2.421165943145752, "logps/rejected": -4.516971111297607, "loss": 0.6688, "nll_loss": 0.6453052163124084, "rewards/accuracies": 1.0, "rewards/chosen": -0.24211660027503967, "rewards/margins": 0.20958054065704346, "rewards/rejected": -0.45169714093208313, "step": 2062 }, { "epoch": 5.648186173853525, "grad_norm": 2.6374101638793945, "learning_rate": 7.173972602739725e-07, "log_odds_chosen": 2.0237865447998047, "log_odds_ratio": -0.184249609708786, "logits/chosen": 0.9438982605934143, "logits/rejected": 0.9205899834632874, "logps/chosen": -1.6322963237762451, "logps/rejected": -3.4039957523345947, "loss": 0.6722, "nll_loss": 0.6537373065948486, "rewards/accuracies": 1.0, "rewards/chosen": -0.16322964429855347, "rewards/margins": 0.17716993391513824, "rewards/rejected": -0.3403995633125305, "step": 2063 }, { "epoch": 5.650924024640657, "grad_norm": 2.718147039413452, "learning_rate": 7.172602739726027e-07, "log_odds_chosen": 2.671128034591675, "log_odds_ratio": -0.25128185749053955, "logits/chosen": 0.9394369125366211, "logits/rejected": 0.9254952669143677, "logps/chosen": -2.160768508911133, "logps/rejected": -4.671147346496582, "loss": 0.797, "nll_loss": 0.7718441486358643, "rewards/accuracies": 1.0, "rewards/chosen": -0.21607686579227448, "rewards/margins": 0.2510378658771515, "rewards/rejected": -0.46711474657058716, "step": 2064 }, { "epoch": 5.653661875427789, "grad_norm": 2.3661015033721924, "learning_rate": 7.171232876712329e-07, "log_odds_chosen": 2.6277170181274414, "log_odds_ratio": -0.19220158457756042, "logits/chosen": 0.8916100263595581, "logits/rejected": 0.8368031978607178, "logps/chosen": -1.8496639728546143, "logps/rejected": -4.31777811050415, "loss": 0.6742, "nll_loss": 0.6550116539001465, "rewards/accuracies": 1.0, "rewards/chosen": -0.18496640026569366, "rewards/margins": 0.2468114197254181, "rewards/rejected": -0.43177780508995056, "step": 2065 }, { "epoch": 5.6563997262149215, "grad_norm": 5.555394649505615, "learning_rate": 7.169863013698629e-07, "log_odds_chosen": 1.085456371307373, "log_odds_ratio": -0.46551573276519775, "logits/chosen": 0.9791654348373413, "logits/rejected": 0.9740554094314575, "logps/chosen": -2.798741340637207, "logps/rejected": -3.8414485454559326, "loss": 0.7735, "nll_loss": 0.7269410490989685, "rewards/accuracies": 0.875, "rewards/chosen": -0.27987414598464966, "rewards/margins": 0.10427073389291763, "rewards/rejected": -0.3841448426246643, "step": 2066 }, { "epoch": 5.6591375770020536, "grad_norm": 3.610123634338379, "learning_rate": 7.168493150684931e-07, "log_odds_chosen": 2.321678876876831, "log_odds_ratio": -0.39797651767730713, "logits/chosen": 1.0684800148010254, "logits/rejected": 1.0579566955566406, "logps/chosen": -2.280275344848633, "logps/rejected": -4.39277458190918, "loss": 0.6853, "nll_loss": 0.6454871892929077, "rewards/accuracies": 0.875, "rewards/chosen": -0.22802752256393433, "rewards/margins": 0.2112499177455902, "rewards/rejected": -0.43927741050720215, "step": 2067 }, { "epoch": 5.661875427789186, "grad_norm": 2.4458274841308594, "learning_rate": 7.167123287671233e-07, "log_odds_chosen": 3.822035789489746, "log_odds_ratio": -0.13037633895874023, "logits/chosen": 1.1298919916152954, "logits/rejected": 1.1685349941253662, "logps/chosen": -2.321061611175537, "logps/rejected": -6.045823097229004, "loss": 0.7027, "nll_loss": 0.6896583437919617, "rewards/accuracies": 0.875, "rewards/chosen": -0.23210617899894714, "rewards/margins": 0.37247616052627563, "rewards/rejected": -0.6045823097229004, "step": 2068 }, { "epoch": 5.664613278576318, "grad_norm": 2.494697093963623, "learning_rate": 7.165753424657533e-07, "log_odds_chosen": 3.5292069911956787, "log_odds_ratio": -0.18095479905605316, "logits/chosen": 0.7394188642501831, "logits/rejected": 0.6454358696937561, "logps/chosen": -2.211620569229126, "logps/rejected": -5.597861289978027, "loss": 0.7319, "nll_loss": 0.7138215899467468, "rewards/accuracies": 0.875, "rewards/chosen": -0.22116206586360931, "rewards/margins": 0.33862408995628357, "rewards/rejected": -0.5597861409187317, "step": 2069 }, { "epoch": 5.66735112936345, "grad_norm": 4.314507484436035, "learning_rate": 7.164383561643835e-07, "log_odds_chosen": 1.3275303840637207, "log_odds_ratio": -0.38961848616600037, "logits/chosen": 0.7161062955856323, "logits/rejected": 0.7221578359603882, "logps/chosen": -2.729424476623535, "logps/rejected": -3.9217724800109863, "loss": 0.7929, "nll_loss": 0.7539615035057068, "rewards/accuracies": 0.625, "rewards/chosen": -0.272942453622818, "rewards/margins": 0.11923478543758392, "rewards/rejected": -0.3921772539615631, "step": 2070 }, { "epoch": 5.670088980150582, "grad_norm": 2.9497861862182617, "learning_rate": 7.163013698630137e-07, "log_odds_chosen": 1.4182460308074951, "log_odds_ratio": -0.33103907108306885, "logits/chosen": 0.744488000869751, "logits/rejected": 0.6683045625686646, "logps/chosen": -2.0398943424224854, "logps/rejected": -3.3059959411621094, "loss": 0.7722, "nll_loss": 0.7390663623809814, "rewards/accuracies": 0.875, "rewards/chosen": -0.2039894312620163, "rewards/margins": 0.1266101747751236, "rewards/rejected": -0.3305996060371399, "step": 2071 }, { "epoch": 5.672826830937714, "grad_norm": 2.701115131378174, "learning_rate": 7.161643835616437e-07, "log_odds_chosen": 2.026980400085449, "log_odds_ratio": -0.36902403831481934, "logits/chosen": 0.725299596786499, "logits/rejected": 0.7320023775100708, "logps/chosen": -2.4253578186035156, "logps/rejected": -4.382235050201416, "loss": 0.8595, "nll_loss": 0.8226062059402466, "rewards/accuracies": 0.875, "rewards/chosen": -0.2425357848405838, "rewards/margins": 0.19568774104118347, "rewards/rejected": -0.4382235109806061, "step": 2072 }, { "epoch": 5.675564681724846, "grad_norm": 2.6408121585845947, "learning_rate": 7.160273972602739e-07, "log_odds_chosen": 2.1854326725006104, "log_odds_ratio": -0.15790945291519165, "logits/chosen": 0.6827605962753296, "logits/rejected": 0.6764068007469177, "logps/chosen": -1.631685733795166, "logps/rejected": -3.6091315746307373, "loss": 0.7777, "nll_loss": 0.7619497179985046, "rewards/accuracies": 1.0, "rewards/chosen": -0.16316857933998108, "rewards/margins": 0.19774457812309265, "rewards/rejected": -0.3609131872653961, "step": 2073 }, { "epoch": 5.678302532511978, "grad_norm": 2.556302070617676, "learning_rate": 7.158904109589041e-07, "log_odds_chosen": 2.871948719024658, "log_odds_ratio": -0.1926698237657547, "logits/chosen": 0.7454776763916016, "logits/rejected": 0.7636620998382568, "logps/chosen": -2.6292991638183594, "logps/rejected": -5.315613269805908, "loss": 0.7318, "nll_loss": 0.7125037908554077, "rewards/accuracies": 0.875, "rewards/chosen": -0.26292991638183594, "rewards/margins": 0.2686313986778259, "rewards/rejected": -0.5315613746643066, "step": 2074 }, { "epoch": 5.68104038329911, "grad_norm": 4.2065629959106445, "learning_rate": 7.157534246575342e-07, "log_odds_chosen": 1.1809957027435303, "log_odds_ratio": -0.46332332491874695, "logits/chosen": 0.796514630317688, "logits/rejected": 0.7659944295883179, "logps/chosen": -2.169834852218628, "logps/rejected": -3.232541084289551, "loss": 0.7316, "nll_loss": 0.6853177547454834, "rewards/accuracies": 0.875, "rewards/chosen": -0.21698349714279175, "rewards/margins": 0.10627063363790512, "rewards/rejected": -0.32325413823127747, "step": 2075 }, { "epoch": 5.683778234086242, "grad_norm": 2.3871662616729736, "learning_rate": 7.156164383561643e-07, "log_odds_chosen": 2.6547951698303223, "log_odds_ratio": -0.20539504289627075, "logits/chosen": 1.0096073150634766, "logits/rejected": 1.0293352603912354, "logps/chosen": -2.1985483169555664, "logps/rejected": -4.777097702026367, "loss": 0.6618, "nll_loss": 0.6412606239318848, "rewards/accuracies": 0.875, "rewards/chosen": -0.21985483169555664, "rewards/margins": 0.2578549385070801, "rewards/rejected": -0.4777097702026367, "step": 2076 }, { "epoch": 5.686516084873374, "grad_norm": 3.0056002140045166, "learning_rate": 7.154794520547945e-07, "log_odds_chosen": 1.7185568809509277, "log_odds_ratio": -0.43151065707206726, "logits/chosen": 0.8052322864532471, "logits/rejected": 0.8254503607749939, "logps/chosen": -2.2315146923065186, "logps/rejected": -3.8149631023406982, "loss": 0.7401, "nll_loss": 0.696934163570404, "rewards/accuracies": 0.75, "rewards/chosen": -0.22315144538879395, "rewards/margins": 0.15834486484527588, "rewards/rejected": -0.3814963102340698, "step": 2077 }, { "epoch": 5.689253935660506, "grad_norm": 2.5818819999694824, "learning_rate": 7.153424657534246e-07, "log_odds_chosen": 2.6327672004699707, "log_odds_ratio": -0.15095584094524384, "logits/chosen": 0.9096264243125916, "logits/rejected": 0.8793336749076843, "logps/chosen": -2.3046674728393555, "logps/rejected": -4.796314239501953, "loss": 0.6948, "nll_loss": 0.6797100305557251, "rewards/accuracies": 1.0, "rewards/chosen": -0.23046673834323883, "rewards/margins": 0.24916471540927887, "rewards/rejected": -0.4796314835548401, "step": 2078 }, { "epoch": 5.691991786447638, "grad_norm": 4.450657844543457, "learning_rate": 7.152054794520548e-07, "log_odds_chosen": 2.494591236114502, "log_odds_ratio": -0.3652329742908478, "logits/chosen": 0.9720720052719116, "logits/rejected": 0.9448798298835754, "logps/chosen": -2.417757272720337, "logps/rejected": -4.8065948486328125, "loss": 0.7762, "nll_loss": 0.7396699786186218, "rewards/accuracies": 0.75, "rewards/chosen": -0.2417757213115692, "rewards/margins": 0.23888376355171204, "rewards/rejected": -0.48065948486328125, "step": 2079 }, { "epoch": 5.69472963723477, "grad_norm": 2.631739854812622, "learning_rate": 7.150684931506848e-07, "log_odds_chosen": 2.3379688262939453, "log_odds_ratio": -0.33136242628097534, "logits/chosen": 0.8574215173721313, "logits/rejected": 0.8469475507736206, "logps/chosen": -1.9569690227508545, "logps/rejected": -4.166290760040283, "loss": 0.7777, "nll_loss": 0.7445736527442932, "rewards/accuracies": 0.875, "rewards/chosen": -0.1956968903541565, "rewards/margins": 0.22093218564987183, "rewards/rejected": -0.4166291058063507, "step": 2080 }, { "epoch": 5.6974674880219025, "grad_norm": 3.841810941696167, "learning_rate": 7.14931506849315e-07, "log_odds_chosen": 2.5081229209899902, "log_odds_ratio": -0.4015842378139496, "logits/chosen": 0.7033872008323669, "logits/rejected": 0.6235479116439819, "logps/chosen": -2.0423383712768555, "logps/rejected": -4.426140785217285, "loss": 0.8081, "nll_loss": 0.767916738986969, "rewards/accuracies": 0.75, "rewards/chosen": -0.20423384010791779, "rewards/margins": 0.23838025331497192, "rewards/rejected": -0.4426140785217285, "step": 2081 }, { "epoch": 5.700205338809035, "grad_norm": 3.6551241874694824, "learning_rate": 7.147945205479452e-07, "log_odds_chosen": 1.6610491275787354, "log_odds_ratio": -0.6477689743041992, "logits/chosen": 0.9186504483222961, "logits/rejected": 0.9474391937255859, "logps/chosen": -2.6684658527374268, "logps/rejected": -4.209917068481445, "loss": 0.8735, "nll_loss": 0.8086850047111511, "rewards/accuracies": 0.625, "rewards/chosen": -0.266846626996994, "rewards/margins": 0.15414512157440186, "rewards/rejected": -0.4209917187690735, "step": 2082 }, { "epoch": 5.702943189596167, "grad_norm": 2.5787792205810547, "learning_rate": 7.146575342465752e-07, "log_odds_chosen": 1.571291446685791, "log_odds_ratio": -0.31370460987091064, "logits/chosen": 0.7176737189292908, "logits/rejected": 0.6634782552719116, "logps/chosen": -1.8976811170578003, "logps/rejected": -3.364112138748169, "loss": 0.7775, "nll_loss": 0.7460984587669373, "rewards/accuracies": 1.0, "rewards/chosen": -0.18976810574531555, "rewards/margins": 0.14664310216903687, "rewards/rejected": -0.3364112377166748, "step": 2083 }, { "epoch": 5.705681040383299, "grad_norm": 3.3127310276031494, "learning_rate": 7.145205479452054e-07, "log_odds_chosen": 0.421078085899353, "log_odds_ratio": -0.548975944519043, "logits/chosen": 0.8250217437744141, "logits/rejected": 0.7870559096336365, "logps/chosen": -2.1489102840423584, "logps/rejected": -2.471771717071533, "loss": 0.7833, "nll_loss": 0.7284428477287292, "rewards/accuracies": 0.625, "rewards/chosen": -0.21489101648330688, "rewards/margins": 0.0322861447930336, "rewards/rejected": -0.24717718362808228, "step": 2084 }, { "epoch": 5.708418891170432, "grad_norm": 4.212541103363037, "learning_rate": 7.143835616438356e-07, "log_odds_chosen": 1.4834609031677246, "log_odds_ratio": -0.38380348682403564, "logits/chosen": 0.8682652711868286, "logits/rejected": 0.9108884334564209, "logps/chosen": -2.258016586303711, "logps/rejected": -3.57704496383667, "loss": 0.7147, "nll_loss": 0.6763487458229065, "rewards/accuracies": 0.75, "rewards/chosen": -0.22580167651176453, "rewards/margins": 0.13190285861492157, "rewards/rejected": -0.3577044904232025, "step": 2085 }, { "epoch": 5.711156741957563, "grad_norm": 10.642485618591309, "learning_rate": 7.142465753424656e-07, "log_odds_chosen": 3.4221787452697754, "log_odds_ratio": -0.9872776865959167, "logits/chosen": 1.4323650598526, "logits/rejected": 1.4506094455718994, "logps/chosen": -3.3291876316070557, "logps/rejected": -6.686227321624756, "loss": 0.8465, "nll_loss": 0.747795045375824, "rewards/accuracies": 0.75, "rewards/chosen": -0.33291876316070557, "rewards/margins": 0.3357039988040924, "rewards/rejected": -0.6686227321624756, "step": 2086 }, { "epoch": 5.713894592744696, "grad_norm": 4.885412216186523, "learning_rate": 7.141095890410958e-07, "log_odds_chosen": 1.0617130994796753, "log_odds_ratio": -0.4694657623767853, "logits/chosen": 0.6637778878211975, "logits/rejected": 0.6319299936294556, "logps/chosen": -1.9021720886230469, "logps/rejected": -2.803870439529419, "loss": 0.7549, "nll_loss": 0.7079644799232483, "rewards/accuracies": 0.75, "rewards/chosen": -0.19021719694137573, "rewards/margins": 0.09016985446214676, "rewards/rejected": -0.2803870439529419, "step": 2087 }, { "epoch": 5.716632443531828, "grad_norm": 4.465142726898193, "learning_rate": 7.13972602739726e-07, "log_odds_chosen": 1.7450374364852905, "log_odds_ratio": -0.3756082057952881, "logits/chosen": 0.8817818760871887, "logits/rejected": 0.8057714700698853, "logps/chosen": -2.327357530593872, "logps/rejected": -3.9274978637695312, "loss": 0.8125, "nll_loss": 0.7749506831169128, "rewards/accuracies": 0.875, "rewards/chosen": -0.2327357530593872, "rewards/margins": 0.1600140482187271, "rewards/rejected": -0.3927498161792755, "step": 2088 }, { "epoch": 5.71937029431896, "grad_norm": 2.7904820442199707, "learning_rate": 7.138356164383561e-07, "log_odds_chosen": 1.7367939949035645, "log_odds_ratio": -0.23510749638080597, "logits/chosen": 0.7910020351409912, "logits/rejected": 0.734438419342041, "logps/chosen": -2.414480447769165, "logps/rejected": -4.048636436462402, "loss": 0.8117, "nll_loss": 0.7882188558578491, "rewards/accuracies": 1.0, "rewards/chosen": -0.2414480745792389, "rewards/margins": 0.16341561079025269, "rewards/rejected": -0.4048636555671692, "step": 2089 }, { "epoch": 5.722108145106092, "grad_norm": 3.9032716751098633, "learning_rate": 7.136986301369862e-07, "log_odds_chosen": 0.7659632563591003, "log_odds_ratio": -0.5689340829849243, "logits/chosen": 0.5007704496383667, "logits/rejected": 0.4462134540081024, "logps/chosen": -2.3796496391296387, "logps/rejected": -3.033146381378174, "loss": 0.838, "nll_loss": 0.7811393141746521, "rewards/accuracies": 0.75, "rewards/chosen": -0.23796497285366058, "rewards/margins": 0.06534967571496964, "rewards/rejected": -0.3033146858215332, "step": 2090 }, { "epoch": 5.724845995893224, "grad_norm": 2.850548505783081, "learning_rate": 7.135616438356164e-07, "log_odds_chosen": 1.9575104713439941, "log_odds_ratio": -0.35508933663368225, "logits/chosen": 0.7435752153396606, "logits/rejected": 0.7848589420318604, "logps/chosen": -2.4021801948547363, "logps/rejected": -4.2649407386779785, "loss": 0.7957, "nll_loss": 0.760182797908783, "rewards/accuracies": 0.875, "rewards/chosen": -0.24021804332733154, "rewards/margins": 0.18627606332302094, "rewards/rejected": -0.4264940917491913, "step": 2091 }, { "epoch": 5.727583846680356, "grad_norm": 4.850385665893555, "learning_rate": 7.134246575342465e-07, "log_odds_chosen": 1.0390347242355347, "log_odds_ratio": -0.45601511001586914, "logits/chosen": 0.9116969108581543, "logits/rejected": 0.8472850918769836, "logps/chosen": -2.557854175567627, "logps/rejected": -3.507209062576294, "loss": 0.8254, "nll_loss": 0.7797586917877197, "rewards/accuracies": 0.75, "rewards/chosen": -0.25578540563583374, "rewards/margins": 0.09493549168109894, "rewards/rejected": -0.3507208824157715, "step": 2092 }, { "epoch": 5.730321697467488, "grad_norm": 2.670745849609375, "learning_rate": 7.132876712328766e-07, "log_odds_chosen": 2.2194433212280273, "log_odds_ratio": -0.24249707162380219, "logits/chosen": 0.8564935922622681, "logits/rejected": 0.8102118968963623, "logps/chosen": -1.6083394289016724, "logps/rejected": -3.648376226425171, "loss": 0.7636, "nll_loss": 0.7393673658370972, "rewards/accuracies": 1.0, "rewards/chosen": -0.1608339548110962, "rewards/margins": 0.20400367677211761, "rewards/rejected": -0.3648376166820526, "step": 2093 }, { "epoch": 5.73305954825462, "grad_norm": 2.676459312438965, "learning_rate": 7.131506849315067e-07, "log_odds_chosen": 2.582760810852051, "log_odds_ratio": -0.18107375502586365, "logits/chosen": 1.214453101158142, "logits/rejected": 1.250213623046875, "logps/chosen": -2.3460960388183594, "logps/rejected": -4.760312080383301, "loss": 0.6377, "nll_loss": 0.6195669770240784, "rewards/accuracies": 0.875, "rewards/chosen": -0.23460960388183594, "rewards/margins": 0.24142161011695862, "rewards/rejected": -0.47603121399879456, "step": 2094 }, { "epoch": 5.735797399041752, "grad_norm": 2.447890520095825, "learning_rate": 7.13013698630137e-07, "log_odds_chosen": 4.335776329040527, "log_odds_ratio": -0.07632239162921906, "logits/chosen": 0.9442911148071289, "logits/rejected": 0.9480767250061035, "logps/chosen": -2.347944498062134, "logps/rejected": -6.521463394165039, "loss": 0.7523, "nll_loss": 0.7446290850639343, "rewards/accuracies": 1.0, "rewards/chosen": -0.23479443788528442, "rewards/margins": 0.4173518419265747, "rewards/rejected": -0.6521463394165039, "step": 2095 }, { "epoch": 5.738535249828884, "grad_norm": 2.8988001346588135, "learning_rate": 7.128767123287672e-07, "log_odds_chosen": 1.6082196235656738, "log_odds_ratio": -0.4452323913574219, "logits/chosen": 0.7209354043006897, "logits/rejected": 0.7090930938720703, "logps/chosen": -1.9667054414749146, "logps/rejected": -3.4264814853668213, "loss": 0.7116, "nll_loss": 0.667106032371521, "rewards/accuracies": 0.75, "rewards/chosen": -0.1966705471277237, "rewards/margins": 0.14597758650779724, "rewards/rejected": -0.34264814853668213, "step": 2096 }, { "epoch": 5.7412731006160165, "grad_norm": 5.208447456359863, "learning_rate": 7.127397260273971e-07, "log_odds_chosen": 2.5665504932403564, "log_odds_ratio": -0.21177516877651215, "logits/chosen": 1.049025297164917, "logits/rejected": 1.064698576927185, "logps/chosen": -2.1270697116851807, "logps/rejected": -4.564967632293701, "loss": 0.7057, "nll_loss": 0.6844791173934937, "rewards/accuracies": 0.875, "rewards/chosen": -0.21270698308944702, "rewards/margins": 0.24378982186317444, "rewards/rejected": -0.4564967751502991, "step": 2097 }, { "epoch": 5.7440109514031485, "grad_norm": 2.691046953201294, "learning_rate": 7.126027397260274e-07, "log_odds_chosen": 2.728590488433838, "log_odds_ratio": -0.32521113753318787, "logits/chosen": 0.6457432508468628, "logits/rejected": 0.6285718679428101, "logps/chosen": -2.1479172706604004, "logps/rejected": -4.770629405975342, "loss": 0.7869, "nll_loss": 0.754417896270752, "rewards/accuracies": 1.0, "rewards/chosen": -0.21479174494743347, "rewards/margins": 0.2622711956501007, "rewards/rejected": -0.4770629405975342, "step": 2098 }, { "epoch": 5.746748802190281, "grad_norm": 3.261795997619629, "learning_rate": 7.124657534246576e-07, "log_odds_chosen": 1.7516427040100098, "log_odds_ratio": -0.3788842558860779, "logits/chosen": 0.4512561559677124, "logits/rejected": 0.4895600378513336, "logps/chosen": -1.8674789667129517, "logps/rejected": -3.451744318008423, "loss": 0.8017, "nll_loss": 0.7637625932693481, "rewards/accuracies": 0.75, "rewards/chosen": -0.18674790859222412, "rewards/margins": 0.15842652320861816, "rewards/rejected": -0.3451744318008423, "step": 2099 }, { "epoch": 5.749486652977413, "grad_norm": 2.74149489402771, "learning_rate": 7.123287671232876e-07, "log_odds_chosen": 1.7084758281707764, "log_odds_ratio": -0.2766168713569641, "logits/chosen": 0.6742072701454163, "logits/rejected": 0.6189791560173035, "logps/chosen": -1.752386212348938, "logps/rejected": -3.336343288421631, "loss": 0.7587, "nll_loss": 0.7310624122619629, "rewards/accuracies": 0.875, "rewards/chosen": -0.17523862421512604, "rewards/margins": 0.1583956927061081, "rewards/rejected": -0.33363431692123413, "step": 2100 }, { "epoch": 5.752224503764545, "grad_norm": 2.6952505111694336, "learning_rate": 7.121917808219178e-07, "log_odds_chosen": 1.7701003551483154, "log_odds_ratio": -0.4250221848487854, "logits/chosen": 0.7056758403778076, "logits/rejected": 0.7265152335166931, "logps/chosen": -2.94199275970459, "logps/rejected": -4.650965690612793, "loss": 0.7792, "nll_loss": 0.7366987466812134, "rewards/accuracies": 0.75, "rewards/chosen": -0.2941993176937103, "rewards/margins": 0.17089727520942688, "rewards/rejected": -0.4650965929031372, "step": 2101 }, { "epoch": 5.754962354551677, "grad_norm": 2.908515691757202, "learning_rate": 7.12054794520548e-07, "log_odds_chosen": 1.549943447113037, "log_odds_ratio": -0.2609180808067322, "logits/chosen": 0.5771365761756897, "logits/rejected": 0.49999868869781494, "logps/chosen": -1.7811994552612305, "logps/rejected": -3.180840492248535, "loss": 0.7048, "nll_loss": 0.6786835789680481, "rewards/accuracies": 1.0, "rewards/chosen": -0.178119957447052, "rewards/margins": 0.13996411859989166, "rewards/rejected": -0.31808409094810486, "step": 2102 }, { "epoch": 5.757700205338809, "grad_norm": 4.0173163414001465, "learning_rate": 7.119178082191781e-07, "log_odds_chosen": 0.8124221563339233, "log_odds_ratio": -0.5451267957687378, "logits/chosen": 0.8951596021652222, "logits/rejected": 0.8302127718925476, "logps/chosen": -2.6994965076446533, "logps/rejected": -3.414437770843506, "loss": 0.7285, "nll_loss": 0.6739847660064697, "rewards/accuracies": 0.75, "rewards/chosen": -0.26994967460632324, "rewards/margins": 0.07149411737918854, "rewards/rejected": -0.3414437770843506, "step": 2103 }, { "epoch": 5.760438056125941, "grad_norm": 2.7753775119781494, "learning_rate": 7.117808219178082e-07, "log_odds_chosen": 1.6801241636276245, "log_odds_ratio": -0.26036858558654785, "logits/chosen": 0.8877907395362854, "logits/rejected": 0.8127211928367615, "logps/chosen": -1.716841459274292, "logps/rejected": -3.248892307281494, "loss": 0.7071, "nll_loss": 0.681050181388855, "rewards/accuracies": 1.0, "rewards/chosen": -0.1716841459274292, "rewards/margins": 0.15320506691932678, "rewards/rejected": -0.32488924264907837, "step": 2104 }, { "epoch": 5.763175906913073, "grad_norm": 2.440605401992798, "learning_rate": 7.116438356164384e-07, "log_odds_chosen": 2.236022472381592, "log_odds_ratio": -0.1619665026664734, "logits/chosen": 0.9164549708366394, "logits/rejected": 0.8504685163497925, "logps/chosen": -1.5595636367797852, "logps/rejected": -3.4487271308898926, "loss": 0.7149, "nll_loss": 0.6987385749816895, "rewards/accuracies": 1.0, "rewards/chosen": -0.15595635771751404, "rewards/margins": 0.18891635537147522, "rewards/rejected": -0.34487271308898926, "step": 2105 }, { "epoch": 5.765913757700205, "grad_norm": 4.20967960357666, "learning_rate": 7.115068493150685e-07, "log_odds_chosen": 1.363399863243103, "log_odds_ratio": -0.4544239640235901, "logits/chosen": 0.5964272618293762, "logits/rejected": 0.5743824243545532, "logps/chosen": -2.089121103286743, "logps/rejected": -3.2950026988983154, "loss": 0.7235, "nll_loss": 0.6780197024345398, "rewards/accuracies": 0.875, "rewards/chosen": -0.20891210436820984, "rewards/margins": 0.12058815360069275, "rewards/rejected": -0.3295002579689026, "step": 2106 }, { "epoch": 5.768651608487337, "grad_norm": 2.6981427669525146, "learning_rate": 7.113698630136986e-07, "log_odds_chosen": 1.4297637939453125, "log_odds_ratio": -0.32586726546287537, "logits/chosen": 0.8318281173706055, "logits/rejected": 0.7642912864685059, "logps/chosen": -2.0295376777648926, "logps/rejected": -3.381288528442383, "loss": 0.7518, "nll_loss": 0.7192003726959229, "rewards/accuracies": 0.875, "rewards/chosen": -0.2029537558555603, "rewards/margins": 0.13517509400844574, "rewards/rejected": -0.33812886476516724, "step": 2107 }, { "epoch": 5.771389459274469, "grad_norm": 2.589421272277832, "learning_rate": 7.112328767123288e-07, "log_odds_chosen": 2.1439175605773926, "log_odds_ratio": -0.3854356110095978, "logits/chosen": 0.8057077527046204, "logits/rejected": 0.7794411182403564, "logps/chosen": -1.4272228479385376, "logps/rejected": -3.441718101501465, "loss": 0.8053, "nll_loss": 0.766708493232727, "rewards/accuracies": 0.875, "rewards/chosen": -0.14272227883338928, "rewards/margins": 0.20144954323768616, "rewards/rejected": -0.34417182207107544, "step": 2108 }, { "epoch": 5.774127310061601, "grad_norm": 3.4464118480682373, "learning_rate": 7.110958904109589e-07, "log_odds_chosen": 1.460665225982666, "log_odds_ratio": -0.397854208946228, "logits/chosen": 0.7251400351524353, "logits/rejected": 0.6969559788703918, "logps/chosen": -1.8497490882873535, "logps/rejected": -3.196838617324829, "loss": 0.766, "nll_loss": 0.7262331247329712, "rewards/accuracies": 0.875, "rewards/chosen": -0.18497492372989655, "rewards/margins": 0.1347089260816574, "rewards/rejected": -0.31968384981155396, "step": 2109 }, { "epoch": 5.776865160848733, "grad_norm": 2.691873550415039, "learning_rate": 7.109589041095891e-07, "log_odds_chosen": 1.8615199327468872, "log_odds_ratio": -0.2574794292449951, "logits/chosen": 0.7008748650550842, "logits/rejected": 0.666930079460144, "logps/chosen": -1.3377503156661987, "logps/rejected": -3.0062742233276367, "loss": 0.7025, "nll_loss": 0.6767576932907104, "rewards/accuracies": 1.0, "rewards/chosen": -0.1337750405073166, "rewards/margins": 0.16685239970684052, "rewards/rejected": -0.3006274402141571, "step": 2110 }, { "epoch": 5.779603011635865, "grad_norm": 3.6925530433654785, "learning_rate": 7.108219178082191e-07, "log_odds_chosen": 1.0401231050491333, "log_odds_ratio": -0.5482553243637085, "logits/chosen": 0.9587610363960266, "logits/rejected": 0.8909327983856201, "logps/chosen": -2.614898443222046, "logps/rejected": -3.572481155395508, "loss": 0.8201, "nll_loss": 0.7652531862258911, "rewards/accuracies": 0.75, "rewards/chosen": -0.2614898383617401, "rewards/margins": 0.09575828909873962, "rewards/rejected": -0.3572481572628021, "step": 2111 }, { "epoch": 5.782340862422998, "grad_norm": 2.3835480213165283, "learning_rate": 7.106849315068493e-07, "log_odds_chosen": 1.0275297164916992, "log_odds_ratio": -0.39441484212875366, "logits/chosen": 0.6717317700386047, "logits/rejected": 0.6209700703620911, "logps/chosen": -1.5725353956222534, "logps/rejected": -2.4852936267852783, "loss": 0.74, "nll_loss": 0.7005270719528198, "rewards/accuracies": 0.875, "rewards/chosen": -0.15725353360176086, "rewards/margins": 0.09127583354711533, "rewards/rejected": -0.2485293745994568, "step": 2112 }, { "epoch": 5.78507871321013, "grad_norm": 4.2791266441345215, "learning_rate": 7.105479452054795e-07, "log_odds_chosen": 2.346235752105713, "log_odds_ratio": -0.22399654984474182, "logits/chosen": 0.9919546842575073, "logits/rejected": 0.9617221355438232, "logps/chosen": -1.8711562156677246, "logps/rejected": -3.982666015625, "loss": 0.7208, "nll_loss": 0.6984193921089172, "rewards/accuracies": 0.875, "rewards/chosen": -0.1871156394481659, "rewards/margins": 0.21115098893642426, "rewards/rejected": -0.39826661348342896, "step": 2113 }, { "epoch": 5.7878165639972625, "grad_norm": 2.4275593757629395, "learning_rate": 7.104109589041095e-07, "log_odds_chosen": 3.661325216293335, "log_odds_ratio": -0.07448546588420868, "logits/chosen": 1.1442416906356812, "logits/rejected": 1.1803184747695923, "logps/chosen": -1.6175293922424316, "logps/rejected": -4.970699787139893, "loss": 0.5767, "nll_loss": 0.5692736506462097, "rewards/accuracies": 1.0, "rewards/chosen": -0.16175293922424316, "rewards/margins": 0.33531704545021057, "rewards/rejected": -0.49706998467445374, "step": 2114 }, { "epoch": 5.790554414784395, "grad_norm": 4.431628227233887, "learning_rate": 7.102739726027397e-07, "log_odds_chosen": 1.7550840377807617, "log_odds_ratio": -0.5492442846298218, "logits/chosen": 0.7495747804641724, "logits/rejected": 0.6953803300857544, "logps/chosen": -2.207864761352539, "logps/rejected": -3.8742499351501465, "loss": 0.8145, "nll_loss": 0.7595313787460327, "rewards/accuracies": 0.75, "rewards/chosen": -0.22078648209571838, "rewards/margins": 0.1666385531425476, "rewards/rejected": -0.387425035238266, "step": 2115 }, { "epoch": 5.793292265571527, "grad_norm": 2.48332142829895, "learning_rate": 7.101369863013699e-07, "log_odds_chosen": 1.7655911445617676, "log_odds_ratio": -0.2940569519996643, "logits/chosen": 0.7913421392440796, "logits/rejected": 0.7664365768432617, "logps/chosen": -1.6502071619033813, "logps/rejected": -3.1812448501586914, "loss": 0.731, "nll_loss": 0.7016415596008301, "rewards/accuracies": 1.0, "rewards/chosen": -0.16502073407173157, "rewards/margins": 0.153103768825531, "rewards/rejected": -0.3181244730949402, "step": 2116 }, { "epoch": 5.796030116358659, "grad_norm": 2.880018472671509, "learning_rate": 7.1e-07, "log_odds_chosen": 1.493884563446045, "log_odds_ratio": -0.307338148355484, "logits/chosen": 0.6421133279800415, "logits/rejected": 0.5699067711830139, "logps/chosen": -1.7693452835083008, "logps/rejected": -3.127159357070923, "loss": 0.7394, "nll_loss": 0.7087090015411377, "rewards/accuracies": 0.875, "rewards/chosen": -0.17693454027175903, "rewards/margins": 0.1357814073562622, "rewards/rejected": -0.31271594762802124, "step": 2117 }, { "epoch": 5.798767967145791, "grad_norm": 3.9351627826690674, "learning_rate": 7.098630136986301e-07, "log_odds_chosen": 0.4776334762573242, "log_odds_ratio": -0.8452506065368652, "logits/chosen": 0.6059694290161133, "logits/rejected": 0.6870142221450806, "logps/chosen": -2.790198564529419, "logps/rejected": -3.2236366271972656, "loss": 0.7292, "nll_loss": 0.6447169184684753, "rewards/accuracies": 0.5, "rewards/chosen": -0.27901989221572876, "rewards/margins": 0.04334380105137825, "rewards/rejected": -0.3223636746406555, "step": 2118 }, { "epoch": 5.801505817932923, "grad_norm": 2.7017855644226074, "learning_rate": 7.097260273972603e-07, "log_odds_chosen": 2.284074544906616, "log_odds_ratio": -0.2368198186159134, "logits/chosen": 1.0315768718719482, "logits/rejected": 1.0823296308517456, "logps/chosen": -2.141657829284668, "logps/rejected": -4.297893047332764, "loss": 0.7003, "nll_loss": 0.6766082048416138, "rewards/accuracies": 1.0, "rewards/chosen": -0.2141657918691635, "rewards/margins": 0.21562354266643524, "rewards/rejected": -0.42978933453559875, "step": 2119 }, { "epoch": 5.804243668720055, "grad_norm": 2.477696180343628, "learning_rate": 7.095890410958904e-07, "log_odds_chosen": 1.3045827150344849, "log_odds_ratio": -0.32096919417381287, "logits/chosen": 0.8991064429283142, "logits/rejected": 0.8339352607727051, "logps/chosen": -1.664602279663086, "logps/rejected": -2.8211076259613037, "loss": 0.7368, "nll_loss": 0.7046661376953125, "rewards/accuracies": 1.0, "rewards/chosen": -0.16646024584770203, "rewards/margins": 0.11565054208040237, "rewards/rejected": -0.2821107506752014, "step": 2120 }, { "epoch": 5.806981519507187, "grad_norm": 6.253849506378174, "learning_rate": 7.094520547945205e-07, "log_odds_chosen": 2.0152745246887207, "log_odds_ratio": -0.5364859104156494, "logits/chosen": 0.8558337688446045, "logits/rejected": 0.8468620777130127, "logps/chosen": -2.6734519004821777, "logps/rejected": -4.455099105834961, "loss": 0.7966, "nll_loss": 0.742926836013794, "rewards/accuracies": 0.75, "rewards/chosen": -0.2673451900482178, "rewards/margins": 0.1781647652387619, "rewards/rejected": -0.4455099403858185, "step": 2121 }, { "epoch": 5.809719370294319, "grad_norm": 3.2570974826812744, "learning_rate": 7.093150684931507e-07, "log_odds_chosen": 1.8538742065429688, "log_odds_ratio": -0.3968935012817383, "logits/chosen": 1.041266918182373, "logits/rejected": 1.0974699258804321, "logps/chosen": -2.589423179626465, "logps/rejected": -4.386141777038574, "loss": 0.7644, "nll_loss": 0.724709689617157, "rewards/accuracies": 0.625, "rewards/chosen": -0.2589423656463623, "rewards/margins": 0.17967182397842407, "rewards/rejected": -0.4386141896247864, "step": 2122 }, { "epoch": 5.812457221081451, "grad_norm": 4.165341854095459, "learning_rate": 7.091780821917808e-07, "log_odds_chosen": 1.1346683502197266, "log_odds_ratio": -0.5276991128921509, "logits/chosen": 0.767747163772583, "logits/rejected": 0.6694109439849854, "logps/chosen": -2.4151928424835205, "logps/rejected": -3.436339855194092, "loss": 0.8038, "nll_loss": 0.751058042049408, "rewards/accuracies": 0.875, "rewards/chosen": -0.2415192872285843, "rewards/margins": 0.10211469233036041, "rewards/rejected": -0.3436339497566223, "step": 2123 }, { "epoch": 5.815195071868583, "grad_norm": 3.230241060256958, "learning_rate": 7.09041095890411e-07, "log_odds_chosen": 2.024966239929199, "log_odds_ratio": -0.2462233603000641, "logits/chosen": 0.8539925813674927, "logits/rejected": 0.8868499398231506, "logps/chosen": -3.0569448471069336, "logps/rejected": -4.9811906814575195, "loss": 0.791, "nll_loss": 0.7663370370864868, "rewards/accuracies": 0.875, "rewards/chosen": -0.3056945204734802, "rewards/margins": 0.19242453575134277, "rewards/rejected": -0.4981189966201782, "step": 2124 }, { "epoch": 5.817932922655715, "grad_norm": 2.571915864944458, "learning_rate": 7.08904109589041e-07, "log_odds_chosen": 1.8470016717910767, "log_odds_ratio": -0.3394036889076233, "logits/chosen": 0.7521835565567017, "logits/rejected": 0.7176798582077026, "logps/chosen": -1.5546218156814575, "logps/rejected": -3.232447624206543, "loss": 0.7218, "nll_loss": 0.6878912448883057, "rewards/accuracies": 0.875, "rewards/chosen": -0.15546217560768127, "rewards/margins": 0.16778258979320526, "rewards/rejected": -0.32324475049972534, "step": 2125 }, { "epoch": 5.820670773442847, "grad_norm": 4.220767021179199, "learning_rate": 7.087671232876712e-07, "log_odds_chosen": 1.6578234434127808, "log_odds_ratio": -0.36316365003585815, "logits/chosen": 0.9308019876480103, "logits/rejected": 0.923965334892273, "logps/chosen": -2.0188612937927246, "logps/rejected": -3.5521507263183594, "loss": 0.7655, "nll_loss": 0.729169487953186, "rewards/accuracies": 0.75, "rewards/chosen": -0.2018861174583435, "rewards/margins": 0.15332898497581482, "rewards/rejected": -0.3552151024341583, "step": 2126 }, { "epoch": 5.823408624229979, "grad_norm": 2.5523927211761475, "learning_rate": 7.086301369863014e-07, "log_odds_chosen": 1.4223315715789795, "log_odds_ratio": -0.34725457429885864, "logits/chosen": 0.7123595476150513, "logits/rejected": 0.6121377348899841, "logps/chosen": -1.5647293329238892, "logps/rejected": -2.8533194065093994, "loss": 0.7483, "nll_loss": 0.713525116443634, "rewards/accuracies": 0.75, "rewards/chosen": -0.15647293627262115, "rewards/margins": 0.1288590133190155, "rewards/rejected": -0.28533193469047546, "step": 2127 }, { "epoch": 5.8261464750171115, "grad_norm": 2.3590798377990723, "learning_rate": 7.084931506849314e-07, "log_odds_chosen": 1.8738694190979004, "log_odds_ratio": -0.25569862127304077, "logits/chosen": 0.7155651450157166, "logits/rejected": 0.6900178790092468, "logps/chosen": -1.9887866973876953, "logps/rejected": -3.587899923324585, "loss": 0.6964, "nll_loss": 0.670783519744873, "rewards/accuracies": 0.875, "rewards/chosen": -0.19887866079807281, "rewards/margins": 0.15991133451461792, "rewards/rejected": -0.35879001021385193, "step": 2128 }, { "epoch": 5.8288843258042435, "grad_norm": 2.356098175048828, "learning_rate": 7.083561643835616e-07, "log_odds_chosen": 2.145321846008301, "log_odds_ratio": -0.25864851474761963, "logits/chosen": 0.7732646465301514, "logits/rejected": 0.7572667598724365, "logps/chosen": -1.458545207977295, "logps/rejected": -3.4054882526397705, "loss": 0.7585, "nll_loss": 0.7326656579971313, "rewards/accuracies": 1.0, "rewards/chosen": -0.14585451781749725, "rewards/margins": 0.19469428062438965, "rewards/rejected": -0.3405488133430481, "step": 2129 }, { "epoch": 5.831622176591376, "grad_norm": 2.927734613418579, "learning_rate": 7.082191780821918e-07, "log_odds_chosen": 1.9629393815994263, "log_odds_ratio": -0.22873100638389587, "logits/chosen": 0.7158318758010864, "logits/rejected": 0.6876888275146484, "logps/chosen": -2.5248985290527344, "logps/rejected": -4.366268157958984, "loss": 0.7132, "nll_loss": 0.6902878284454346, "rewards/accuracies": 1.0, "rewards/chosen": -0.2524898648262024, "rewards/margins": 0.1841369867324829, "rewards/rejected": -0.4366268515586853, "step": 2130 }, { "epoch": 5.834360027378508, "grad_norm": 2.5743794441223145, "learning_rate": 7.080821917808219e-07, "log_odds_chosen": 2.2536449432373047, "log_odds_ratio": -0.22235196828842163, "logits/chosen": 0.6954195499420166, "logits/rejected": 0.6580675840377808, "logps/chosen": -2.266087293624878, "logps/rejected": -4.366252422332764, "loss": 0.7903, "nll_loss": 0.7680743932723999, "rewards/accuracies": 1.0, "rewards/chosen": -0.22660870850086212, "rewards/margins": 0.21001656353473663, "rewards/rejected": -0.43662527203559875, "step": 2131 }, { "epoch": 5.83709787816564, "grad_norm": 3.5046205520629883, "learning_rate": 7.07945205479452e-07, "log_odds_chosen": 1.1961166858673096, "log_odds_ratio": -0.8034504055976868, "logits/chosen": 0.6129199266433716, "logits/rejected": 0.5673426985740662, "logps/chosen": -1.856006383895874, "logps/rejected": -2.94627046585083, "loss": 0.7904, "nll_loss": 0.7100858092308044, "rewards/accuracies": 0.75, "rewards/chosen": -0.1856006532907486, "rewards/margins": 0.10902639478445053, "rewards/rejected": -0.2946270704269409, "step": 2132 }, { "epoch": 5.839835728952772, "grad_norm": 2.8177506923675537, "learning_rate": 7.078082191780822e-07, "log_odds_chosen": 2.327434539794922, "log_odds_ratio": -0.2804369628429413, "logits/chosen": 0.8952670097351074, "logits/rejected": 0.8845250010490417, "logps/chosen": -1.588379979133606, "logps/rejected": -3.757309913635254, "loss": 0.7245, "nll_loss": 0.696445882320404, "rewards/accuracies": 0.75, "rewards/chosen": -0.15883800387382507, "rewards/margins": 0.2168930023908615, "rewards/rejected": -0.3757309913635254, "step": 2133 }, { "epoch": 5.842573579739904, "grad_norm": 2.9333035945892334, "learning_rate": 7.076712328767123e-07, "log_odds_chosen": 1.602222204208374, "log_odds_ratio": -0.4236353039741516, "logits/chosen": 0.6992015838623047, "logits/rejected": 0.6541357636451721, "logps/chosen": -1.6930806636810303, "logps/rejected": -3.2060718536376953, "loss": 0.7822, "nll_loss": 0.7398465871810913, "rewards/accuracies": 0.75, "rewards/chosen": -0.16930808126926422, "rewards/margins": 0.1512991040945053, "rewards/rejected": -0.32060718536376953, "step": 2134 }, { "epoch": 5.845311430527036, "grad_norm": 2.3358142375946045, "learning_rate": 7.075342465753424e-07, "log_odds_chosen": 1.342078447341919, "log_odds_ratio": -0.33496108651161194, "logits/chosen": 0.6457921266555786, "logits/rejected": 0.657494068145752, "logps/chosen": -1.7544023990631104, "logps/rejected": -2.9841599464416504, "loss": 0.7004, "nll_loss": 0.6669483780860901, "rewards/accuracies": 0.875, "rewards/chosen": -0.17544025182724, "rewards/margins": 0.12297576665878296, "rewards/rejected": -0.29841601848602295, "step": 2135 }, { "epoch": 5.848049281314168, "grad_norm": 6.956554412841797, "learning_rate": 7.073972602739726e-07, "log_odds_chosen": 0.13510078191757202, "log_odds_ratio": -0.9721415638923645, "logits/chosen": 0.5469615459442139, "logits/rejected": 0.5101399421691895, "logps/chosen": -2.731602191925049, "logps/rejected": -2.784996509552002, "loss": 0.8513, "nll_loss": 0.7541093230247498, "rewards/accuracies": 0.625, "rewards/chosen": -0.2731602191925049, "rewards/margins": 0.005339441820979118, "rewards/rejected": -0.27849966287612915, "step": 2136 }, { "epoch": 5.850787132101301, "grad_norm": 2.6032283306121826, "learning_rate": 7.072602739726027e-07, "log_odds_chosen": 2.084031105041504, "log_odds_ratio": -0.28825482726097107, "logits/chosen": 0.5280612707138062, "logits/rejected": 0.43552976846694946, "logps/chosen": -1.5387799739837646, "logps/rejected": -3.467818260192871, "loss": 0.7273, "nll_loss": 0.6984338760375977, "rewards/accuracies": 1.0, "rewards/chosen": -0.15387800335884094, "rewards/margins": 0.19290384650230408, "rewards/rejected": -0.34678182005882263, "step": 2137 }, { "epoch": 5.853524982888432, "grad_norm": 2.6467740535736084, "learning_rate": 7.071232876712329e-07, "log_odds_chosen": 1.7451612949371338, "log_odds_ratio": -0.2966369390487671, "logits/chosen": 0.7009255290031433, "logits/rejected": 0.6521562933921814, "logps/chosen": -1.8220036029815674, "logps/rejected": -3.442777633666992, "loss": 0.7147, "nll_loss": 0.6850205659866333, "rewards/accuracies": 1.0, "rewards/chosen": -0.1822003573179245, "rewards/margins": 0.1620774120092392, "rewards/rejected": -0.3442777693271637, "step": 2138 }, { "epoch": 5.856262833675565, "grad_norm": 2.0920615196228027, "learning_rate": 7.06986301369863e-07, "log_odds_chosen": 2.4988653659820557, "log_odds_ratio": -0.19314827024936676, "logits/chosen": 0.7732652425765991, "logits/rejected": 0.7171061038970947, "logps/chosen": -1.8072216510772705, "logps/rejected": -4.151012897491455, "loss": 0.7541, "nll_loss": 0.7348113059997559, "rewards/accuracies": 1.0, "rewards/chosen": -0.18072214722633362, "rewards/margins": 0.23437917232513428, "rewards/rejected": -0.4151013493537903, "step": 2139 }, { "epoch": 5.859000684462696, "grad_norm": 3.33817458152771, "learning_rate": 7.068493150684931e-07, "log_odds_chosen": 2.7099061012268066, "log_odds_ratio": -0.3375433087348938, "logits/chosen": 1.0088367462158203, "logits/rejected": 1.057942509651184, "logps/chosen": -2.3029704093933105, "logps/rejected": -4.913543701171875, "loss": 0.8305, "nll_loss": 0.7967602014541626, "rewards/accuracies": 0.75, "rewards/chosen": -0.2302970588207245, "rewards/margins": 0.2610573470592499, "rewards/rejected": -0.49135440587997437, "step": 2140 }, { "epoch": 5.861738535249829, "grad_norm": 2.543487787246704, "learning_rate": 7.067123287671233e-07, "log_odds_chosen": 3.7791519165039062, "log_odds_ratio": -0.16977672278881073, "logits/chosen": 0.8364043235778809, "logits/rejected": 0.8354408144950867, "logps/chosen": -2.061188220977783, "logps/rejected": -5.696350574493408, "loss": 0.7104, "nll_loss": 0.693449854850769, "rewards/accuracies": 0.875, "rewards/chosen": -0.20611883699893951, "rewards/margins": 0.3635162115097046, "rewards/rejected": -0.5696350336074829, "step": 2141 }, { "epoch": 5.864476386036961, "grad_norm": 3.195516586303711, "learning_rate": 7.065753424657533e-07, "log_odds_chosen": 1.7730355262756348, "log_odds_ratio": -0.23695014417171478, "logits/chosen": 0.9939426183700562, "logits/rejected": 0.9138005971908569, "logps/chosen": -2.112553596496582, "logps/rejected": -3.770516872406006, "loss": 0.7703, "nll_loss": 0.7466403245925903, "rewards/accuracies": 1.0, "rewards/chosen": -0.21125535666942596, "rewards/margins": 0.16579633951187134, "rewards/rejected": -0.3770517110824585, "step": 2142 }, { "epoch": 5.867214236824093, "grad_norm": 2.374789237976074, "learning_rate": 7.064383561643835e-07, "log_odds_chosen": 1.3640103340148926, "log_odds_ratio": -0.2820192575454712, "logits/chosen": 0.568718671798706, "logits/rejected": 0.5519208908081055, "logps/chosen": -1.8691797256469727, "logps/rejected": -3.093463182449341, "loss": 0.6741, "nll_loss": 0.6458533406257629, "rewards/accuracies": 1.0, "rewards/chosen": -0.1869179904460907, "rewards/margins": 0.12242834270000458, "rewards/rejected": -0.3093463182449341, "step": 2143 }, { "epoch": 5.8699520876112254, "grad_norm": 2.6572885513305664, "learning_rate": 7.063013698630137e-07, "log_odds_chosen": 3.9767637252807617, "log_odds_ratio": -0.14642852544784546, "logits/chosen": 0.714094340801239, "logits/rejected": 0.6956356763839722, "logps/chosen": -2.1207289695739746, "logps/rejected": -5.974809169769287, "loss": 0.8334, "nll_loss": 0.8187559843063354, "rewards/accuracies": 1.0, "rewards/chosen": -0.21207287907600403, "rewards/margins": 0.38540807366371155, "rewards/rejected": -0.5974810123443604, "step": 2144 }, { "epoch": 5.8726899383983575, "grad_norm": 2.6543314456939697, "learning_rate": 7.061643835616438e-07, "log_odds_chosen": 2.2098870277404785, "log_odds_ratio": -0.2809532880783081, "logits/chosen": 0.9153305292129517, "logits/rejected": 0.924211859703064, "logps/chosen": -2.1374754905700684, "logps/rejected": -4.210914611816406, "loss": 0.7361, "nll_loss": 0.7079663872718811, "rewards/accuracies": 0.75, "rewards/chosen": -0.2137475460767746, "rewards/margins": 0.2073439061641693, "rewards/rejected": -0.4210914671421051, "step": 2145 }, { "epoch": 5.87542778918549, "grad_norm": 2.4868035316467285, "learning_rate": 7.060273972602739e-07, "log_odds_chosen": 2.5702569484710693, "log_odds_ratio": -0.29582226276397705, "logits/chosen": 0.6400020718574524, "logits/rejected": 0.6240515112876892, "logps/chosen": -1.9488364458084106, "logps/rejected": -4.413230895996094, "loss": 0.744, "nll_loss": 0.7143689393997192, "rewards/accuracies": 0.875, "rewards/chosen": -0.19488365948200226, "rewards/margins": 0.24643948674201965, "rewards/rejected": -0.4413231313228607, "step": 2146 }, { "epoch": 5.878165639972622, "grad_norm": 2.5576629638671875, "learning_rate": 7.058904109589041e-07, "log_odds_chosen": 2.9870219230651855, "log_odds_ratio": -0.19700884819030762, "logits/chosen": 0.7406243085861206, "logits/rejected": 0.6757022142410278, "logps/chosen": -1.7035595178604126, "logps/rejected": -4.4456281661987305, "loss": 0.8027, "nll_loss": 0.7829965353012085, "rewards/accuracies": 1.0, "rewards/chosen": -0.17035594582557678, "rewards/margins": 0.27420687675476074, "rewards/rejected": -0.4445628523826599, "step": 2147 }, { "epoch": 5.880903490759754, "grad_norm": 2.9426441192626953, "learning_rate": 7.057534246575342e-07, "log_odds_chosen": 3.2831497192382812, "log_odds_ratio": -0.15857109427452087, "logits/chosen": 0.823693037033081, "logits/rejected": 0.8003792762756348, "logps/chosen": -1.9316529035568237, "logps/rejected": -5.044439315795898, "loss": 0.6629, "nll_loss": 0.6470777988433838, "rewards/accuracies": 1.0, "rewards/chosen": -0.19316528737545013, "rewards/margins": 0.31127864122390747, "rewards/rejected": -0.504443883895874, "step": 2148 }, { "epoch": 5.883641341546886, "grad_norm": 3.965456485748291, "learning_rate": 7.056164383561643e-07, "log_odds_chosen": 1.1564385890960693, "log_odds_ratio": -0.3769279420375824, "logits/chosen": 0.5640057921409607, "logits/rejected": 0.5956266522407532, "logps/chosen": -2.238600015640259, "logps/rejected": -3.298412799835205, "loss": 0.7731, "nll_loss": 0.7353626489639282, "rewards/accuracies": 0.875, "rewards/chosen": -0.2238600105047226, "rewards/margins": 0.10598127543926239, "rewards/rejected": -0.329841285943985, "step": 2149 }, { "epoch": 5.886379192334018, "grad_norm": 2.908062219619751, "learning_rate": 7.054794520547945e-07, "log_odds_chosen": 2.4778800010681152, "log_odds_ratio": -0.25345578789711, "logits/chosen": 0.9133119583129883, "logits/rejected": 0.9047417640686035, "logps/chosen": -2.102417469024658, "logps/rejected": -4.448099136352539, "loss": 0.7372, "nll_loss": 0.711845874786377, "rewards/accuracies": 0.875, "rewards/chosen": -0.21024173498153687, "rewards/margins": 0.23456816375255585, "rewards/rejected": -0.4448099136352539, "step": 2150 }, { "epoch": 5.88911704312115, "grad_norm": 2.61377215385437, "learning_rate": 7.053424657534246e-07, "log_odds_chosen": 1.7316670417785645, "log_odds_ratio": -0.2898631989955902, "logits/chosen": 0.8533375263214111, "logits/rejected": 0.8864647746086121, "logps/chosen": -1.8740065097808838, "logps/rejected": -3.4829816818237305, "loss": 0.7306, "nll_loss": 0.7016297578811646, "rewards/accuracies": 1.0, "rewards/chosen": -0.18740065395832062, "rewards/margins": 0.16089753806591034, "rewards/rejected": -0.34829816222190857, "step": 2151 }, { "epoch": 5.891854893908282, "grad_norm": 2.7617788314819336, "learning_rate": 7.052054794520548e-07, "log_odds_chosen": 3.2687759399414062, "log_odds_ratio": -0.11215312778949738, "logits/chosen": 0.8211232423782349, "logits/rejected": 0.8056932091712952, "logps/chosen": -2.186744213104248, "logps/rejected": -5.259470462799072, "loss": 0.6733, "nll_loss": 0.662091076374054, "rewards/accuracies": 1.0, "rewards/chosen": -0.21867439150810242, "rewards/margins": 0.30727264285087585, "rewards/rejected": -0.5259470343589783, "step": 2152 }, { "epoch": 5.894592744695414, "grad_norm": 2.7748219966888428, "learning_rate": 7.050684931506849e-07, "log_odds_chosen": 1.6099451780319214, "log_odds_ratio": -0.37201017141342163, "logits/chosen": 0.816115140914917, "logits/rejected": 0.8147674202919006, "logps/chosen": -1.923346757888794, "logps/rejected": -3.4146728515625, "loss": 0.7459, "nll_loss": 0.7087039351463318, "rewards/accuracies": 0.875, "rewards/chosen": -0.19233468174934387, "rewards/margins": 0.1491325944662094, "rewards/rejected": -0.3414672911167145, "step": 2153 }, { "epoch": 5.897330595482546, "grad_norm": 3.575629949569702, "learning_rate": 7.04931506849315e-07, "log_odds_chosen": 1.530205249786377, "log_odds_ratio": -0.5149900913238525, "logits/chosen": 0.7169777154922485, "logits/rejected": 0.6972684264183044, "logps/chosen": -2.5896286964416504, "logps/rejected": -4.05513858795166, "loss": 0.7663, "nll_loss": 0.7148220539093018, "rewards/accuracies": 0.75, "rewards/chosen": -0.25896286964416504, "rewards/margins": 0.1465509980916977, "rewards/rejected": -0.40551385283470154, "step": 2154 }, { "epoch": 5.900068446269678, "grad_norm": 2.767056465148926, "learning_rate": 7.047945205479452e-07, "log_odds_chosen": 3.0756046772003174, "log_odds_ratio": -0.15059290826320648, "logits/chosen": 1.0310614109039307, "logits/rejected": 1.0805624723434448, "logps/chosen": -2.5075955390930176, "logps/rejected": -5.453227996826172, "loss": 0.7248, "nll_loss": 0.7097135782241821, "rewards/accuracies": 1.0, "rewards/chosen": -0.2507595717906952, "rewards/margins": 0.2945632338523865, "rewards/rejected": -0.545322835445404, "step": 2155 }, { "epoch": 5.90280629705681, "grad_norm": 2.974858522415161, "learning_rate": 7.046575342465752e-07, "log_odds_chosen": 1.0640716552734375, "log_odds_ratio": -0.42411959171295166, "logits/chosen": 0.8020619750022888, "logits/rejected": 0.7777668237686157, "logps/chosen": -1.3987648487091064, "logps/rejected": -2.315812587738037, "loss": 0.7348, "nll_loss": 0.6924223899841309, "rewards/accuracies": 0.875, "rewards/chosen": -0.13987646996974945, "rewards/margins": 0.09170479327440262, "rewards/rejected": -0.23158125579357147, "step": 2156 }, { "epoch": 5.905544147843942, "grad_norm": 2.376244068145752, "learning_rate": 7.045205479452054e-07, "log_odds_chosen": 2.8120384216308594, "log_odds_ratio": -0.14442013204097748, "logits/chosen": 0.9101501703262329, "logits/rejected": 0.9069217443466187, "logps/chosen": -2.053626537322998, "logps/rejected": -4.708187580108643, "loss": 0.68, "nll_loss": 0.6655203700065613, "rewards/accuracies": 1.0, "rewards/chosen": -0.20536264777183533, "rewards/margins": 0.26545611023902893, "rewards/rejected": -0.47081878781318665, "step": 2157 }, { "epoch": 5.908281998631074, "grad_norm": 2.6271135807037354, "learning_rate": 7.043835616438356e-07, "log_odds_chosen": 1.5838193893432617, "log_odds_ratio": -0.2780684232711792, "logits/chosen": 0.749243438243866, "logits/rejected": 0.7057989835739136, "logps/chosen": -1.7585769891738892, "logps/rejected": -3.2070906162261963, "loss": 0.734, "nll_loss": 0.7061957120895386, "rewards/accuracies": 1.0, "rewards/chosen": -0.17585770785808563, "rewards/margins": 0.14485135674476624, "rewards/rejected": -0.3207090497016907, "step": 2158 }, { "epoch": 5.9110198494182065, "grad_norm": 5.474133014678955, "learning_rate": 7.042465753424656e-07, "log_odds_chosen": 1.8961700201034546, "log_odds_ratio": -0.3761707544326782, "logits/chosen": 1.049528956413269, "logits/rejected": 1.0658113956451416, "logps/chosen": -2.519986391067505, "logps/rejected": -4.286410808563232, "loss": 0.699, "nll_loss": 0.661381721496582, "rewards/accuracies": 0.875, "rewards/chosen": -0.2519986629486084, "rewards/margins": 0.17664244771003723, "rewards/rejected": -0.42864108085632324, "step": 2159 }, { "epoch": 5.9137577002053385, "grad_norm": 2.5502712726593018, "learning_rate": 7.041095890410958e-07, "log_odds_chosen": 4.061855792999268, "log_odds_ratio": -0.21298548579216003, "logits/chosen": 1.0136626958847046, "logits/rejected": 1.0239582061767578, "logps/chosen": -2.481961488723755, "logps/rejected": -6.474344253540039, "loss": 0.6584, "nll_loss": 0.6371236443519592, "rewards/accuracies": 0.875, "rewards/chosen": -0.24819612503051758, "rewards/margins": 0.3992382287979126, "rewards/rejected": -0.647434413433075, "step": 2160 }, { "epoch": 5.916495550992471, "grad_norm": 3.410245180130005, "learning_rate": 7.03972602739726e-07, "log_odds_chosen": 1.7408615350723267, "log_odds_ratio": -0.36357396841049194, "logits/chosen": 0.9096978902816772, "logits/rejected": 0.9088504314422607, "logps/chosen": -2.4139013290405273, "logps/rejected": -4.076683521270752, "loss": 0.7485, "nll_loss": 0.7121585607528687, "rewards/accuracies": 0.875, "rewards/chosen": -0.2413901537656784, "rewards/margins": 0.16627822816371918, "rewards/rejected": -0.40766841173171997, "step": 2161 }, { "epoch": 5.919233401779603, "grad_norm": 2.478997230529785, "learning_rate": 7.038356164383561e-07, "log_odds_chosen": 2.3093619346618652, "log_odds_ratio": -0.17593657970428467, "logits/chosen": 0.7816973924636841, "logits/rejected": 0.7538232803344727, "logps/chosen": -1.8062660694122314, "logps/rejected": -3.9197959899902344, "loss": 0.8168, "nll_loss": 0.7991611957550049, "rewards/accuracies": 1.0, "rewards/chosen": -0.18062661588191986, "rewards/margins": 0.21135300397872925, "rewards/rejected": -0.3919796347618103, "step": 2162 }, { "epoch": 5.921971252566735, "grad_norm": 2.5473015308380127, "learning_rate": 7.036986301369862e-07, "log_odds_chosen": 1.9500794410705566, "log_odds_ratio": -0.2898237109184265, "logits/chosen": 0.6652441024780273, "logits/rejected": 0.5896502137184143, "logps/chosen": -1.3568178415298462, "logps/rejected": -3.1109066009521484, "loss": 0.7213, "nll_loss": 0.6922797560691833, "rewards/accuracies": 1.0, "rewards/chosen": -0.13568179309368134, "rewards/margins": 0.17540886998176575, "rewards/rejected": -0.3110906481742859, "step": 2163 }, { "epoch": 5.924709103353868, "grad_norm": 3.081434965133667, "learning_rate": 7.035616438356164e-07, "log_odds_chosen": 1.6874253749847412, "log_odds_ratio": -0.446025550365448, "logits/chosen": 0.8825899362564087, "logits/rejected": 0.8900290131568909, "logps/chosen": -2.495781660079956, "logps/rejected": -4.146699905395508, "loss": 0.8802, "nll_loss": 0.8355516195297241, "rewards/accuracies": 0.625, "rewards/chosen": -0.24957817792892456, "rewards/margins": 0.1650918424129486, "rewards/rejected": -0.41467005014419556, "step": 2164 }, { "epoch": 5.927446954140999, "grad_norm": 2.533449649810791, "learning_rate": 7.034246575342465e-07, "log_odds_chosen": 3.9645462036132812, "log_odds_ratio": -0.23872941732406616, "logits/chosen": 0.8699443340301514, "logits/rejected": 0.8574061393737793, "logps/chosen": -2.5038747787475586, "logps/rejected": -6.347970962524414, "loss": 0.7461, "nll_loss": 0.7221949100494385, "rewards/accuracies": 0.875, "rewards/chosen": -0.2503874897956848, "rewards/margins": 0.3844096064567566, "rewards/rejected": -0.6347970962524414, "step": 2165 }, { "epoch": 5.930184804928132, "grad_norm": 4.443012714385986, "learning_rate": 7.032876712328767e-07, "log_odds_chosen": 0.8816249370574951, "log_odds_ratio": -0.6499345898628235, "logits/chosen": 0.6038709878921509, "logits/rejected": 0.6011847257614136, "logps/chosen": -2.402768135070801, "logps/rejected": -3.2430129051208496, "loss": 0.8149, "nll_loss": 0.7499560117721558, "rewards/accuracies": 0.625, "rewards/chosen": -0.24027681350708008, "rewards/margins": 0.08402450382709503, "rewards/rejected": -0.3243013024330139, "step": 2166 }, { "epoch": 5.932922655715264, "grad_norm": 2.515868663787842, "learning_rate": 7.031506849315068e-07, "log_odds_chosen": 1.8976020812988281, "log_odds_ratio": -0.2791009843349457, "logits/chosen": 0.8749129176139832, "logits/rejected": 0.8548527956008911, "logps/chosen": -1.3803726434707642, "logps/rejected": -3.070556640625, "loss": 0.6308, "nll_loss": 0.602915346622467, "rewards/accuracies": 0.875, "rewards/chosen": -0.13803726434707642, "rewards/margins": 0.16901835799217224, "rewards/rejected": -0.30705565214157104, "step": 2167 }, { "epoch": 5.935660506502396, "grad_norm": 2.9127917289733887, "learning_rate": 7.030136986301369e-07, "log_odds_chosen": 1.264224648475647, "log_odds_ratio": -0.4037570059299469, "logits/chosen": 0.7387717366218567, "logits/rejected": 0.6490811705589294, "logps/chosen": -2.192157030105591, "logps/rejected": -3.361267328262329, "loss": 0.7975, "nll_loss": 0.7571629881858826, "rewards/accuracies": 0.75, "rewards/chosen": -0.21921572089195251, "rewards/margins": 0.11691102385520935, "rewards/rejected": -0.33612674474716187, "step": 2168 }, { "epoch": 5.938398357289528, "grad_norm": 3.808666467666626, "learning_rate": 7.028767123287671e-07, "log_odds_chosen": 3.8411622047424316, "log_odds_ratio": -0.12109147012233734, "logits/chosen": 0.9536293148994446, "logits/rejected": 0.9251116514205933, "logps/chosen": -2.4922842979431152, "logps/rejected": -6.177435398101807, "loss": 0.8057, "nll_loss": 0.7935518622398376, "rewards/accuracies": 1.0, "rewards/chosen": -0.24922841787338257, "rewards/margins": 0.36851513385772705, "rewards/rejected": -0.6177435517311096, "step": 2169 }, { "epoch": 5.94113620807666, "grad_norm": 5.803213596343994, "learning_rate": 7.027397260273972e-07, "log_odds_chosen": 2.21272873878479, "log_odds_ratio": -0.6743423938751221, "logits/chosen": 0.8135967254638672, "logits/rejected": 0.7678180932998657, "logps/chosen": -2.7023696899414062, "logps/rejected": -4.822443008422852, "loss": 0.8227, "nll_loss": 0.7552659511566162, "rewards/accuracies": 0.75, "rewards/chosen": -0.270236998796463, "rewards/margins": 0.2120072841644287, "rewards/rejected": -0.4822443127632141, "step": 2170 }, { "epoch": 5.943874058863792, "grad_norm": 3.271470308303833, "learning_rate": 7.026027397260273e-07, "log_odds_chosen": 1.9114993810653687, "log_odds_ratio": -0.30598101019859314, "logits/chosen": 0.7045296430587769, "logits/rejected": 0.6494425535202026, "logps/chosen": -2.14774751663208, "logps/rejected": -3.936025381088257, "loss": 0.7947, "nll_loss": 0.7640990018844604, "rewards/accuracies": 0.875, "rewards/chosen": -0.21477475762367249, "rewards/margins": 0.17882776260375977, "rewards/rejected": -0.39360255002975464, "step": 2171 }, { "epoch": 5.946611909650924, "grad_norm": 2.388151168823242, "learning_rate": 7.024657534246575e-07, "log_odds_chosen": 4.231420516967773, "log_odds_ratio": -0.14559613168239594, "logits/chosen": 0.783025860786438, "logits/rejected": 0.7410135269165039, "logps/chosen": -2.537332057952881, "logps/rejected": -6.651756286621094, "loss": 0.8456, "nll_loss": 0.8310660719871521, "rewards/accuracies": 1.0, "rewards/chosen": -0.25373321771621704, "rewards/margins": 0.41144245862960815, "rewards/rejected": -0.6651756763458252, "step": 2172 }, { "epoch": 5.949349760438056, "grad_norm": 2.938415050506592, "learning_rate": 7.023287671232875e-07, "log_odds_chosen": 1.8253018856048584, "log_odds_ratio": -0.23595274984836578, "logits/chosen": 0.6372775435447693, "logits/rejected": 0.5219087600708008, "logps/chosen": -2.252708911895752, "logps/rejected": -3.9821269512176514, "loss": 0.8057, "nll_loss": 0.7821051478385925, "rewards/accuracies": 1.0, "rewards/chosen": -0.22527088224887848, "rewards/margins": 0.17294183373451233, "rewards/rejected": -0.398212730884552, "step": 2173 }, { "epoch": 5.952087611225188, "grad_norm": 3.2961418628692627, "learning_rate": 7.021917808219177e-07, "log_odds_chosen": 1.5317397117614746, "log_odds_ratio": -0.4767177104949951, "logits/chosen": 0.9178462028503418, "logits/rejected": 0.9016053676605225, "logps/chosen": -2.9409656524658203, "logps/rejected": -4.382219314575195, "loss": 0.8357, "nll_loss": 0.7880399227142334, "rewards/accuracies": 0.875, "rewards/chosen": -0.29409658908843994, "rewards/margins": 0.14412541687488556, "rewards/rejected": -0.4382219910621643, "step": 2174 }, { "epoch": 5.95482546201232, "grad_norm": 3.3030037879943848, "learning_rate": 7.02054794520548e-07, "log_odds_chosen": 1.5879194736480713, "log_odds_ratio": -0.3195107877254486, "logits/chosen": 0.8944010138511658, "logits/rejected": 0.8867493867874146, "logps/chosen": -2.471855878829956, "logps/rejected": -3.9395551681518555, "loss": 0.754, "nll_loss": 0.7220401763916016, "rewards/accuracies": 1.0, "rewards/chosen": -0.2471855878829956, "rewards/margins": 0.14676989614963531, "rewards/rejected": -0.3939554989337921, "step": 2175 }, { "epoch": 5.9575633127994525, "grad_norm": 2.2282605171203613, "learning_rate": 7.01917808219178e-07, "log_odds_chosen": 1.6517009735107422, "log_odds_ratio": -0.20414197444915771, "logits/chosen": 0.6294954419136047, "logits/rejected": 0.5900560021400452, "logps/chosen": -1.6568293571472168, "logps/rejected": -3.1317567825317383, "loss": 0.7094, "nll_loss": 0.6889803409576416, "rewards/accuracies": 1.0, "rewards/chosen": -0.16568294167518616, "rewards/margins": 0.14749273657798767, "rewards/rejected": -0.31317567825317383, "step": 2176 }, { "epoch": 5.960301163586585, "grad_norm": 3.2240402698516846, "learning_rate": 7.017808219178082e-07, "log_odds_chosen": 1.6664080619812012, "log_odds_ratio": -0.31350064277648926, "logits/chosen": 1.0038163661956787, "logits/rejected": 1.0107592344284058, "logps/chosen": -2.0484254360198975, "logps/rejected": -3.5972414016723633, "loss": 0.7396, "nll_loss": 0.7082198858261108, "rewards/accuracies": 1.0, "rewards/chosen": -0.20484253764152527, "rewards/margins": 0.15488161146640778, "rewards/rejected": -0.35972413420677185, "step": 2177 }, { "epoch": 5.963039014373717, "grad_norm": 2.735060453414917, "learning_rate": 7.016438356164384e-07, "log_odds_chosen": 3.164316177368164, "log_odds_ratio": -0.10182242095470428, "logits/chosen": 1.0056676864624023, "logits/rejected": 0.9648768901824951, "logps/chosen": -2.0700926780700684, "logps/rejected": -5.083371162414551, "loss": 0.7306, "nll_loss": 0.7204289436340332, "rewards/accuracies": 1.0, "rewards/chosen": -0.20700925588607788, "rewards/margins": 0.30132782459259033, "rewards/rejected": -0.508337140083313, "step": 2178 }, { "epoch": 5.965776865160849, "grad_norm": 2.2576982975006104, "learning_rate": 7.015068493150685e-07, "log_odds_chosen": 2.2795729637145996, "log_odds_ratio": -0.19057399034500122, "logits/chosen": 0.7614428997039795, "logits/rejected": 0.7209410667419434, "logps/chosen": -1.5365941524505615, "logps/rejected": -3.5447816848754883, "loss": 0.7127, "nll_loss": 0.6936856508255005, "rewards/accuracies": 1.0, "rewards/chosen": -0.1536594182252884, "rewards/margins": 0.20081877708435059, "rewards/rejected": -0.3544781804084778, "step": 2179 }, { "epoch": 5.968514715947981, "grad_norm": 2.5906293392181396, "learning_rate": 7.013698630136986e-07, "log_odds_chosen": 1.6415913105010986, "log_odds_ratio": -0.2528262734413147, "logits/chosen": 0.5067689418792725, "logits/rejected": 0.4497750401496887, "logps/chosen": -2.1301121711730957, "logps/rejected": -3.6103873252868652, "loss": 0.7728, "nll_loss": 0.7475149035453796, "rewards/accuracies": 0.875, "rewards/chosen": -0.213011234998703, "rewards/margins": 0.14802750945091248, "rewards/rejected": -0.3610387444496155, "step": 2180 }, { "epoch": 5.971252566735113, "grad_norm": 2.5416886806488037, "learning_rate": 7.012328767123288e-07, "log_odds_chosen": 2.2573463916778564, "log_odds_ratio": -0.24264420568943024, "logits/chosen": 0.5576277375221252, "logits/rejected": 0.5038673877716064, "logps/chosen": -1.7831792831420898, "logps/rejected": -3.8516039848327637, "loss": 0.7493, "nll_loss": 0.7250628471374512, "rewards/accuracies": 0.875, "rewards/chosen": -0.17831793427467346, "rewards/margins": 0.20684246718883514, "rewards/rejected": -0.3851603865623474, "step": 2181 }, { "epoch": 5.973990417522245, "grad_norm": 2.4147109985351562, "learning_rate": 7.010958904109589e-07, "log_odds_chosen": 2.8201470375061035, "log_odds_ratio": -0.29302555322647095, "logits/chosen": 0.5590918660163879, "logits/rejected": 0.5316616296768188, "logps/chosen": -1.7885963916778564, "logps/rejected": -4.471011161804199, "loss": 0.8198, "nll_loss": 0.7904640436172485, "rewards/accuracies": 0.875, "rewards/chosen": -0.1788596361875534, "rewards/margins": 0.2682415246963501, "rewards/rejected": -0.4471011757850647, "step": 2182 }, { "epoch": 5.976728268309377, "grad_norm": 4.324696063995361, "learning_rate": 7.009589041095891e-07, "log_odds_chosen": 1.4399043321609497, "log_odds_ratio": -0.5732827186584473, "logits/chosen": 0.5441060662269592, "logits/rejected": 0.48049396276474, "logps/chosen": -2.2069969177246094, "logps/rejected": -3.544408082962036, "loss": 0.7732, "nll_loss": 0.7158803343772888, "rewards/accuracies": 0.875, "rewards/chosen": -0.22069969773292542, "rewards/margins": 0.1337411105632782, "rewards/rejected": -0.3544408082962036, "step": 2183 }, { "epoch": 5.979466119096509, "grad_norm": 2.514564037322998, "learning_rate": 7.008219178082192e-07, "log_odds_chosen": 1.7294261455535889, "log_odds_ratio": -0.3114475905895233, "logits/chosen": 0.6898239850997925, "logits/rejected": 0.7025699615478516, "logps/chosen": -1.63108491897583, "logps/rejected": -3.186876058578491, "loss": 0.7652, "nll_loss": 0.7340402603149414, "rewards/accuracies": 0.875, "rewards/chosen": -0.16310849785804749, "rewards/margins": 0.1555791199207306, "rewards/rejected": -0.3186876177787781, "step": 2184 }, { "epoch": 5.982203969883641, "grad_norm": 9.490707397460938, "learning_rate": 7.006849315068493e-07, "log_odds_chosen": 0.9340832829475403, "log_odds_ratio": -0.5869590044021606, "logits/chosen": 0.9250791072845459, "logits/rejected": 0.8845549821853638, "logps/chosen": -2.6798369884490967, "logps/rejected": -3.4863882064819336, "loss": 0.7427, "nll_loss": 0.6839975714683533, "rewards/accuracies": 0.75, "rewards/chosen": -0.26798370480537415, "rewards/margins": 0.08065511286258698, "rewards/rejected": -0.3486388325691223, "step": 2185 }, { "epoch": 5.984941820670773, "grad_norm": 2.9106218814849854, "learning_rate": 7.005479452054795e-07, "log_odds_chosen": 1.706950068473816, "log_odds_ratio": -0.5087631344795227, "logits/chosen": 0.9109792709350586, "logits/rejected": 0.8385187983512878, "logps/chosen": -2.158859968185425, "logps/rejected": -3.7087247371673584, "loss": 0.7998, "nll_loss": 0.748876690864563, "rewards/accuracies": 0.75, "rewards/chosen": -0.21588599681854248, "rewards/margins": 0.15498648583889008, "rewards/rejected": -0.37087246775627136, "step": 2186 }, { "epoch": 5.987679671457905, "grad_norm": 3.007314920425415, "learning_rate": 7.004109589041095e-07, "log_odds_chosen": 2.064263343811035, "log_odds_ratio": -0.25657135248184204, "logits/chosen": 0.9066908359527588, "logits/rejected": 0.9157651662826538, "logps/chosen": -2.0856363773345947, "logps/rejected": -4.032657623291016, "loss": 0.6747, "nll_loss": 0.6490190625190735, "rewards/accuracies": 0.875, "rewards/chosen": -0.2085636556148529, "rewards/margins": 0.1947021633386612, "rewards/rejected": -0.4032658040523529, "step": 2187 }, { "epoch": 5.990417522245037, "grad_norm": 2.4612791538238525, "learning_rate": 7.002739726027397e-07, "log_odds_chosen": 1.9338207244873047, "log_odds_ratio": -0.3026152551174164, "logits/chosen": 0.8471387028694153, "logits/rejected": 0.8497492074966431, "logps/chosen": -1.7198235988616943, "logps/rejected": -3.4508585929870605, "loss": 0.766, "nll_loss": 0.7357179522514343, "rewards/accuracies": 0.875, "rewards/chosen": -0.17198236286640167, "rewards/margins": 0.17310354113578796, "rewards/rejected": -0.34508588910102844, "step": 2188 }, { "epoch": 5.993155373032169, "grad_norm": 6.311845302581787, "learning_rate": 7.001369863013699e-07, "log_odds_chosen": 2.1210975646972656, "log_odds_ratio": -0.7811406850814819, "logits/chosen": 1.1826660633087158, "logits/rejected": 1.2096163034439087, "logps/chosen": -3.6366708278656006, "logps/rejected": -5.712389945983887, "loss": 0.843, "nll_loss": 0.7648913264274597, "rewards/accuracies": 0.875, "rewards/chosen": -0.3636670708656311, "rewards/margins": 0.20757190883159637, "rewards/rejected": -0.5712389945983887, "step": 2189 }, { "epoch": 5.9958932238193015, "grad_norm": 3.193186044692993, "learning_rate": 7e-07, "log_odds_chosen": 2.7770938873291016, "log_odds_ratio": -0.23132099211215973, "logits/chosen": 0.7109051942825317, "logits/rejected": 0.6632909774780273, "logps/chosen": -1.905411958694458, "logps/rejected": -4.568070411682129, "loss": 0.8372, "nll_loss": 0.8141000270843506, "rewards/accuracies": 0.875, "rewards/chosen": -0.19054119288921356, "rewards/margins": 0.266265869140625, "rewards/rejected": -0.45680704712867737, "step": 2190 }, { "epoch": 5.998631074606434, "grad_norm": 2.6346869468688965, "learning_rate": 6.998630136986301e-07, "log_odds_chosen": 1.0693416595458984, "log_odds_ratio": -0.5012602806091309, "logits/chosen": 0.6912287473678589, "logits/rejected": 0.6659078598022461, "logps/chosen": -1.8963782787322998, "logps/rejected": -2.846445083618164, "loss": 0.806, "nll_loss": 0.7558473348617554, "rewards/accuracies": 0.75, "rewards/chosen": -0.18963783979415894, "rewards/margins": 0.09500670433044434, "rewards/rejected": -0.28464454412460327, "step": 2191 }, { "epoch": 6.0013689253935665, "grad_norm": 4.766686916351318, "learning_rate": 6.997260273972603e-07, "log_odds_chosen": 2.356210708618164, "log_odds_ratio": -0.34116190671920776, "logits/chosen": 0.5996959209442139, "logits/rejected": 0.5607486963272095, "logps/chosen": -2.1788482666015625, "logps/rejected": -4.368764400482178, "loss": 0.8278, "nll_loss": 0.7936388254165649, "rewards/accuracies": 0.875, "rewards/chosen": -0.217884823679924, "rewards/margins": 0.21899163722991943, "rewards/rejected": -0.43687647581100464, "step": 2192 }, { "epoch": 6.0041067761806985, "grad_norm": 2.635774612426758, "learning_rate": 6.995890410958904e-07, "log_odds_chosen": 2.7960000038146973, "log_odds_ratio": -0.2789972424507141, "logits/chosen": 0.6141960024833679, "logits/rejected": 0.4973964989185333, "logps/chosen": -2.180509328842163, "logps/rejected": -4.8674468994140625, "loss": 0.755, "nll_loss": 0.7271419763565063, "rewards/accuracies": 0.875, "rewards/chosen": -0.21805092692375183, "rewards/margins": 0.268693745136261, "rewards/rejected": -0.48674464225769043, "step": 2193 }, { "epoch": 6.006844626967831, "grad_norm": 3.7010674476623535, "learning_rate": 6.994520547945205e-07, "log_odds_chosen": 1.2473719120025635, "log_odds_ratio": -0.425643652677536, "logits/chosen": 0.8382794857025146, "logits/rejected": 0.8107982873916626, "logps/chosen": -2.5131735801696777, "logps/rejected": -3.6518795490264893, "loss": 0.7459, "nll_loss": 0.7033421993255615, "rewards/accuracies": 0.875, "rewards/chosen": -0.2513173818588257, "rewards/margins": 0.11387059092521667, "rewards/rejected": -0.36518794298171997, "step": 2194 }, { "epoch": 6.009582477754963, "grad_norm": 2.4785003662109375, "learning_rate": 6.993150684931507e-07, "log_odds_chosen": 1.6287622451782227, "log_odds_ratio": -0.26389849185943604, "logits/chosen": 0.7220639586448669, "logits/rejected": 0.6936706900596619, "logps/chosen": -3.1848127841949463, "logps/rejected": -4.711429595947266, "loss": 0.7586, "nll_loss": 0.7321804761886597, "rewards/accuracies": 0.875, "rewards/chosen": -0.31848129630088806, "rewards/margins": 0.15266166627407074, "rewards/rejected": -0.4711429476737976, "step": 2195 }, { "epoch": 6.012320328542095, "grad_norm": 2.8148958683013916, "learning_rate": 6.991780821917808e-07, "log_odds_chosen": 3.1309947967529297, "log_odds_ratio": -0.11596283316612244, "logits/chosen": 0.9845012426376343, "logits/rejected": 0.9825003147125244, "logps/chosen": -1.6029881238937378, "logps/rejected": -4.4886674880981445, "loss": 0.6965, "nll_loss": 0.6848719716072083, "rewards/accuracies": 1.0, "rewards/chosen": -0.16029880940914154, "rewards/margins": 0.2885679304599762, "rewards/rejected": -0.44886672496795654, "step": 2196 }, { "epoch": 6.015058179329227, "grad_norm": 3.632704019546509, "learning_rate": 6.99041095890411e-07, "log_odds_chosen": 0.7786267995834351, "log_odds_ratio": -0.4668157398700714, "logits/chosen": 0.791192889213562, "logits/rejected": 0.7693020105361938, "logps/chosen": -2.316728115081787, "logps/rejected": -2.9983818531036377, "loss": 0.691, "nll_loss": 0.6443313360214233, "rewards/accuracies": 0.875, "rewards/chosen": -0.23167282342910767, "rewards/margins": 0.0681653618812561, "rewards/rejected": -0.29983818531036377, "step": 2197 }, { "epoch": 6.017796030116359, "grad_norm": 3.0280885696411133, "learning_rate": 6.989041095890411e-07, "log_odds_chosen": 2.7682766914367676, "log_odds_ratio": -0.2102474570274353, "logits/chosen": 0.998583197593689, "logits/rejected": 1.0086262226104736, "logps/chosen": -2.142608880996704, "logps/rejected": -4.797736167907715, "loss": 0.7724, "nll_loss": 0.7513296604156494, "rewards/accuracies": 1.0, "rewards/chosen": -0.21426090598106384, "rewards/margins": 0.26551276445388794, "rewards/rejected": -0.4797736406326294, "step": 2198 }, { "epoch": 6.020533880903491, "grad_norm": 2.3757331371307373, "learning_rate": 6.987671232876712e-07, "log_odds_chosen": 2.512749671936035, "log_odds_ratio": -0.17872996628284454, "logits/chosen": 0.5979491472244263, "logits/rejected": 0.5323395133018494, "logps/chosen": -1.683066964149475, "logps/rejected": -4.034967422485352, "loss": 0.7464, "nll_loss": 0.728502094745636, "rewards/accuracies": 1.0, "rewards/chosen": -0.16830670833587646, "rewards/margins": 0.2351900190114975, "rewards/rejected": -0.40349671244621277, "step": 2199 }, { "epoch": 6.023271731690623, "grad_norm": 2.8499059677124023, "learning_rate": 6.986301369863014e-07, "log_odds_chosen": 1.5644785165786743, "log_odds_ratio": -0.33646368980407715, "logits/chosen": 0.5617800951004028, "logits/rejected": 0.47527599334716797, "logps/chosen": -2.0703623294830322, "logps/rejected": -3.4966373443603516, "loss": 0.7525, "nll_loss": 0.7188578844070435, "rewards/accuracies": 0.75, "rewards/chosen": -0.20703624188899994, "rewards/margins": 0.1426275074481964, "rewards/rejected": -0.34966373443603516, "step": 2200 }, { "epoch": 6.026009582477755, "grad_norm": 2.487461805343628, "learning_rate": 6.984931506849314e-07, "log_odds_chosen": 2.174880027770996, "log_odds_ratio": -0.20488965511322021, "logits/chosen": 0.8128194808959961, "logits/rejected": 0.8153660297393799, "logps/chosen": -1.9387590885162354, "logps/rejected": -3.990600109100342, "loss": 0.6296, "nll_loss": 0.6090652942657471, "rewards/accuracies": 1.0, "rewards/chosen": -0.19387590885162354, "rewards/margins": 0.20518410205841064, "rewards/rejected": -0.3990600109100342, "step": 2201 }, { "epoch": 6.028747433264887, "grad_norm": 4.039867877960205, "learning_rate": 6.983561643835616e-07, "log_odds_chosen": 1.9127850532531738, "log_odds_ratio": -0.35829246044158936, "logits/chosen": 0.8477240204811096, "logits/rejected": 0.8376560807228088, "logps/chosen": -2.4372949600219727, "logps/rejected": -4.264217376708984, "loss": 0.8459, "nll_loss": 0.8100473880767822, "rewards/accuracies": 0.75, "rewards/chosen": -0.24372951686382294, "rewards/margins": 0.18269221484661102, "rewards/rejected": -0.42642173171043396, "step": 2202 }, { "epoch": 6.031485284052019, "grad_norm": 3.810636281967163, "learning_rate": 6.982191780821918e-07, "log_odds_chosen": 1.6907322406768799, "log_odds_ratio": -0.41271913051605225, "logits/chosen": 0.45227667689323425, "logits/rejected": 0.36109858751296997, "logps/chosen": -1.8019462823867798, "logps/rejected": -3.3280816078186035, "loss": 0.7584, "nll_loss": 0.7171434760093689, "rewards/accuracies": 0.875, "rewards/chosen": -0.1801946461200714, "rewards/margins": 0.1526135355234146, "rewards/rejected": -0.33280816674232483, "step": 2203 }, { "epoch": 6.034223134839151, "grad_norm": 3.2858381271362305, "learning_rate": 6.980821917808219e-07, "log_odds_chosen": 1.6370203495025635, "log_odds_ratio": -0.3748657703399658, "logits/chosen": 0.8522529602050781, "logits/rejected": 0.8692789673805237, "logps/chosen": -2.2373125553131104, "logps/rejected": -3.8039727210998535, "loss": 0.8499, "nll_loss": 0.8124022483825684, "rewards/accuracies": 0.875, "rewards/chosen": -0.22373127937316895, "rewards/margins": 0.15666601061820984, "rewards/rejected": -0.3803972899913788, "step": 2204 }, { "epoch": 6.036960985626283, "grad_norm": 3.1839406490325928, "learning_rate": 6.97945205479452e-07, "log_odds_chosen": 2.20882511138916, "log_odds_ratio": -0.22902724146842957, "logits/chosen": 1.029433250427246, "logits/rejected": 1.0373814105987549, "logps/chosen": -1.8713405132293701, "logps/rejected": -3.9774813652038574, "loss": 0.6717, "nll_loss": 0.6488089561462402, "rewards/accuracies": 0.875, "rewards/chosen": -0.1871340572834015, "rewards/margins": 0.21061411499977112, "rewards/rejected": -0.3977481722831726, "step": 2205 }, { "epoch": 6.039698836413415, "grad_norm": 2.938915252685547, "learning_rate": 6.978082191780822e-07, "log_odds_chosen": 3.0210392475128174, "log_odds_ratio": -0.21913889050483704, "logits/chosen": 0.7462217211723328, "logits/rejected": 0.7472008466720581, "logps/chosen": -2.141676425933838, "logps/rejected": -5.032949447631836, "loss": 0.7059, "nll_loss": 0.6839525699615479, "rewards/accuracies": 0.875, "rewards/chosen": -0.21416765451431274, "rewards/margins": 0.28912729024887085, "rewards/rejected": -0.5032948851585388, "step": 2206 }, { "epoch": 6.0424366872005475, "grad_norm": 2.8031160831451416, "learning_rate": 6.976712328767123e-07, "log_odds_chosen": 2.985686779022217, "log_odds_ratio": -0.1609671413898468, "logits/chosen": 0.970882773399353, "logits/rejected": 0.9028991460800171, "logps/chosen": -2.0633673667907715, "logps/rejected": -4.909865379333496, "loss": 0.8339, "nll_loss": 0.8177767992019653, "rewards/accuracies": 1.0, "rewards/chosen": -0.20633675158023834, "rewards/margins": 0.2846497893333435, "rewards/rejected": -0.49098655581474304, "step": 2207 }, { "epoch": 6.04517453798768, "grad_norm": 2.8617870807647705, "learning_rate": 6.975342465753424e-07, "log_odds_chosen": 1.5420570373535156, "log_odds_ratio": -0.29339420795440674, "logits/chosen": 0.47976043820381165, "logits/rejected": 0.37459173798561096, "logps/chosen": -1.4776047468185425, "logps/rejected": -2.8277835845947266, "loss": 0.8176, "nll_loss": 0.7882367372512817, "rewards/accuracies": 1.0, "rewards/chosen": -0.14776046574115753, "rewards/margins": 0.13501788675785065, "rewards/rejected": -0.2827783524990082, "step": 2208 }, { "epoch": 6.047912388774812, "grad_norm": 3.2380964756011963, "learning_rate": 6.973972602739726e-07, "log_odds_chosen": 2.2549655437469482, "log_odds_ratio": -0.1953224241733551, "logits/chosen": 0.7488088607788086, "logits/rejected": 0.7071284055709839, "logps/chosen": -1.393463134765625, "logps/rejected": -3.413330078125, "loss": 0.828, "nll_loss": 0.8084218502044678, "rewards/accuracies": 1.0, "rewards/chosen": -0.13934631645679474, "rewards/margins": 0.20198670029640198, "rewards/rejected": -0.3413330316543579, "step": 2209 }, { "epoch": 6.050650239561944, "grad_norm": 2.8853797912597656, "learning_rate": 6.972602739726027e-07, "log_odds_chosen": 0.8132555484771729, "log_odds_ratio": -0.4104917645454407, "logits/chosen": 0.6683619022369385, "logits/rejected": 0.5771647095680237, "logps/chosen": -1.622596025466919, "logps/rejected": -2.3346660137176514, "loss": 0.75, "nll_loss": 0.7089066505432129, "rewards/accuracies": 1.0, "rewards/chosen": -0.16225962340831757, "rewards/margins": 0.07120698690414429, "rewards/rejected": -0.23346659541130066, "step": 2210 }, { "epoch": 6.053388090349076, "grad_norm": 2.816150188446045, "learning_rate": 6.971232876712329e-07, "log_odds_chosen": 0.8650714159011841, "log_odds_ratio": -0.43750637769699097, "logits/chosen": 0.9692825078964233, "logits/rejected": 0.9818613529205322, "logps/chosen": -1.8708529472351074, "logps/rejected": -2.6343636512756348, "loss": 0.6958, "nll_loss": 0.6520124673843384, "rewards/accuracies": 0.75, "rewards/chosen": -0.18708530068397522, "rewards/margins": 0.07635108381509781, "rewards/rejected": -0.26343634724617004, "step": 2211 }, { "epoch": 6.056125941136208, "grad_norm": 2.61891508102417, "learning_rate": 6.96986301369863e-07, "log_odds_chosen": 3.312580108642578, "log_odds_ratio": -0.21250349283218384, "logits/chosen": 1.0611244440078735, "logits/rejected": 1.0770167112350464, "logps/chosen": -1.9666986465454102, "logps/rejected": -5.133064270019531, "loss": 0.684, "nll_loss": 0.6627166867256165, "rewards/accuracies": 1.0, "rewards/chosen": -0.19666986167430878, "rewards/margins": 0.3166365325450897, "rewards/rejected": -0.5133064389228821, "step": 2212 }, { "epoch": 6.05886379192334, "grad_norm": 2.7744061946868896, "learning_rate": 6.968493150684931e-07, "log_odds_chosen": 2.050473690032959, "log_odds_ratio": -0.35169586539268494, "logits/chosen": 0.9628945589065552, "logits/rejected": 0.9292256832122803, "logps/chosen": -1.5285773277282715, "logps/rejected": -3.4123594760894775, "loss": 0.6856, "nll_loss": 0.6504467725753784, "rewards/accuracies": 0.75, "rewards/chosen": -0.1528577208518982, "rewards/margins": 0.1883782297372818, "rewards/rejected": -0.3412359356880188, "step": 2213 }, { "epoch": 6.061601642710472, "grad_norm": 3.611297607421875, "learning_rate": 6.967123287671233e-07, "log_odds_chosen": 1.1386656761169434, "log_odds_ratio": -0.4647671580314636, "logits/chosen": 0.7814176082611084, "logits/rejected": 0.7578792572021484, "logps/chosen": -2.220641613006592, "logps/rejected": -3.2598538398742676, "loss": 0.7827, "nll_loss": 0.7361973524093628, "rewards/accuracies": 0.875, "rewards/chosen": -0.22206416726112366, "rewards/margins": 0.10392121970653534, "rewards/rejected": -0.3259853720664978, "step": 2214 }, { "epoch": 6.064339493497604, "grad_norm": 2.992809772491455, "learning_rate": 6.965753424657534e-07, "log_odds_chosen": 1.60471773147583, "log_odds_ratio": -0.33599331974983215, "logits/chosen": 0.9007201790809631, "logits/rejected": 0.8919845819473267, "logps/chosen": -2.7021708488464355, "logps/rejected": -4.206356525421143, "loss": 0.7496, "nll_loss": 0.7159677743911743, "rewards/accuracies": 0.875, "rewards/chosen": -0.27021706104278564, "rewards/margins": 0.15041857957839966, "rewards/rejected": -0.4206356406211853, "step": 2215 }, { "epoch": 6.067077344284736, "grad_norm": 3.2008841037750244, "learning_rate": 6.964383561643835e-07, "log_odds_chosen": 2.0116124153137207, "log_odds_ratio": -0.29122304916381836, "logits/chosen": 0.884146511554718, "logits/rejected": 0.8810847997665405, "logps/chosen": -2.4334731101989746, "logps/rejected": -4.342562675476074, "loss": 0.771, "nll_loss": 0.7418810129165649, "rewards/accuracies": 0.875, "rewards/chosen": -0.24334733188152313, "rewards/margins": 0.19090892374515533, "rewards/rejected": -0.43425625562667847, "step": 2216 }, { "epoch": 6.069815195071868, "grad_norm": 2.689068078994751, "learning_rate": 6.963013698630137e-07, "log_odds_chosen": 3.0473761558532715, "log_odds_ratio": -0.13732466101646423, "logits/chosen": 0.9144518375396729, "logits/rejected": 0.8805680274963379, "logps/chosen": -2.0674068927764893, "logps/rejected": -4.914238929748535, "loss": 0.7113, "nll_loss": 0.6976054906845093, "rewards/accuracies": 1.0, "rewards/chosen": -0.20674067735671997, "rewards/margins": 0.2846832275390625, "rewards/rejected": -0.49142390489578247, "step": 2217 }, { "epoch": 6.072553045859001, "grad_norm": 5.8254923820495605, "learning_rate": 6.961643835616438e-07, "log_odds_chosen": 1.1197118759155273, "log_odds_ratio": -0.8189942836761475, "logits/chosen": 0.681671142578125, "logits/rejected": 0.6209578514099121, "logps/chosen": -2.3817596435546875, "logps/rejected": -3.3641204833984375, "loss": 0.8711, "nll_loss": 0.7892412543296814, "rewards/accuracies": 0.875, "rewards/chosen": -0.23817598819732666, "rewards/margins": 0.098236083984375, "rewards/rejected": -0.33641207218170166, "step": 2218 }, { "epoch": 6.075290896646133, "grad_norm": 2.8393876552581787, "learning_rate": 6.960273972602739e-07, "log_odds_chosen": 2.552340507507324, "log_odds_ratio": -0.1937907636165619, "logits/chosen": 1.0411765575408936, "logits/rejected": 1.0855097770690918, "logps/chosen": -2.255877733230591, "logps/rejected": -4.6569976806640625, "loss": 0.7284, "nll_loss": 0.7090537548065186, "rewards/accuracies": 0.875, "rewards/chosen": -0.22558778524398804, "rewards/margins": 0.24011199176311493, "rewards/rejected": -0.4656997621059418, "step": 2219 }, { "epoch": 6.078028747433265, "grad_norm": 3.189988374710083, "learning_rate": 6.958904109589041e-07, "log_odds_chosen": 0.7960783839225769, "log_odds_ratio": -0.40658730268478394, "logits/chosen": 0.695489764213562, "logits/rejected": 0.7257521748542786, "logps/chosen": -2.3910224437713623, "logps/rejected": -3.131420612335205, "loss": 0.8013, "nll_loss": 0.7606227397918701, "rewards/accuracies": 0.875, "rewards/chosen": -0.23910224437713623, "rewards/margins": 0.07403983175754547, "rewards/rejected": -0.3131420612335205, "step": 2220 }, { "epoch": 6.080766598220397, "grad_norm": 2.4158620834350586, "learning_rate": 6.957534246575342e-07, "log_odds_chosen": 1.9603312015533447, "log_odds_ratio": -0.23861074447631836, "logits/chosen": 0.8492065072059631, "logits/rejected": 0.8367191553115845, "logps/chosen": -1.6347689628601074, "logps/rejected": -3.4046199321746826, "loss": 0.6587, "nll_loss": 0.6348350644111633, "rewards/accuracies": 0.875, "rewards/chosen": -0.16347689926624298, "rewards/margins": 0.17698511481285095, "rewards/rejected": -0.34046199917793274, "step": 2221 }, { "epoch": 6.083504449007529, "grad_norm": 4.186294078826904, "learning_rate": 6.956164383561643e-07, "log_odds_chosen": 2.5011239051818848, "log_odds_ratio": -0.20696118474006653, "logits/chosen": 1.0398892164230347, "logits/rejected": 1.0318856239318848, "logps/chosen": -2.3673815727233887, "logps/rejected": -4.738452434539795, "loss": 0.7307, "nll_loss": 0.7099939584732056, "rewards/accuracies": 1.0, "rewards/chosen": -0.2367381602525711, "rewards/margins": 0.23710709810256958, "rewards/rejected": -0.4738452434539795, "step": 2222 }, { "epoch": 6.0862422997946615, "grad_norm": 2.8779702186584473, "learning_rate": 6.954794520547945e-07, "log_odds_chosen": 3.085829019546509, "log_odds_ratio": -0.27715545892715454, "logits/chosen": 0.8566219210624695, "logits/rejected": 0.8819189071655273, "logps/chosen": -2.450953245162964, "logps/rejected": -5.438264846801758, "loss": 0.6759, "nll_loss": 0.6482197046279907, "rewards/accuracies": 0.875, "rewards/chosen": -0.24509532749652863, "rewards/margins": 0.2987312078475952, "rewards/rejected": -0.5438265204429626, "step": 2223 }, { "epoch": 6.0889801505817935, "grad_norm": 4.96729850769043, "learning_rate": 6.953424657534246e-07, "log_odds_chosen": 0.7115729451179504, "log_odds_ratio": -0.4838559627532959, "logits/chosen": 0.5610027313232422, "logits/rejected": 0.5419033765792847, "logps/chosen": -1.9896748065948486, "logps/rejected": -2.5473289489746094, "loss": 0.7384, "nll_loss": 0.6899957060813904, "rewards/accuracies": 0.875, "rewards/chosen": -0.19896748661994934, "rewards/margins": 0.05576540157198906, "rewards/rejected": -0.2547328770160675, "step": 2224 }, { "epoch": 6.091718001368926, "grad_norm": 2.5038466453552246, "learning_rate": 6.952054794520548e-07, "log_odds_chosen": 4.677512168884277, "log_odds_ratio": -0.11106289178133011, "logits/chosen": 0.8740187883377075, "logits/rejected": 0.880913496017456, "logps/chosen": -1.896134853363037, "logps/rejected": -6.384815216064453, "loss": 0.7409, "nll_loss": 0.7298070192337036, "rewards/accuracies": 0.875, "rewards/chosen": -0.1896134912967682, "rewards/margins": 0.4488680958747864, "rewards/rejected": -0.638481616973877, "step": 2225 }, { "epoch": 6.094455852156058, "grad_norm": 2.9357032775878906, "learning_rate": 6.950684931506849e-07, "log_odds_chosen": 2.550687551498413, "log_odds_ratio": -0.3714297115802765, "logits/chosen": 1.0615103244781494, "logits/rejected": 1.0824803113937378, "logps/chosen": -1.9097180366516113, "logps/rejected": -4.352174758911133, "loss": 0.6861, "nll_loss": 0.648973822593689, "rewards/accuracies": 0.625, "rewards/chosen": -0.19097179174423218, "rewards/margins": 0.24424567818641663, "rewards/rejected": -0.4352174699306488, "step": 2226 }, { "epoch": 6.09719370294319, "grad_norm": 2.520475149154663, "learning_rate": 6.94931506849315e-07, "log_odds_chosen": 2.537806987762451, "log_odds_ratio": -0.20632678270339966, "logits/chosen": 1.0291661024093628, "logits/rejected": 1.0378042459487915, "logps/chosen": -3.0221877098083496, "logps/rejected": -5.4181437492370605, "loss": 0.7543, "nll_loss": 0.7337034940719604, "rewards/accuracies": 0.875, "rewards/chosen": -0.3022187650203705, "rewards/margins": 0.23959562182426453, "rewards/rejected": -0.541814386844635, "step": 2227 }, { "epoch": 6.099931553730322, "grad_norm": 2.380218029022217, "learning_rate": 6.947945205479452e-07, "log_odds_chosen": 3.291203737258911, "log_odds_ratio": -0.22819319367408752, "logits/chosen": 0.8388914465904236, "logits/rejected": 0.8277227878570557, "logps/chosen": -1.9166899919509888, "logps/rejected": -4.972862243652344, "loss": 0.7155, "nll_loss": 0.6927170753479004, "rewards/accuracies": 1.0, "rewards/chosen": -0.19166898727416992, "rewards/margins": 0.30561724305152893, "rewards/rejected": -0.49728626012802124, "step": 2228 }, { "epoch": 6.102669404517454, "grad_norm": 2.3030786514282227, "learning_rate": 6.946575342465753e-07, "log_odds_chosen": 2.9328250885009766, "log_odds_ratio": -0.19177326560020447, "logits/chosen": 0.8675754070281982, "logits/rejected": 0.8360145688056946, "logps/chosen": -2.355498790740967, "logps/rejected": -5.187465190887451, "loss": 0.7517, "nll_loss": 0.7325153946876526, "rewards/accuracies": 1.0, "rewards/chosen": -0.2355498969554901, "rewards/margins": 0.2831966280937195, "rewards/rejected": -0.5187464952468872, "step": 2229 }, { "epoch": 6.105407255304586, "grad_norm": 2.513037919998169, "learning_rate": 6.945205479452054e-07, "log_odds_chosen": 2.0071513652801514, "log_odds_ratio": -0.23790468275547028, "logits/chosen": 0.8274053335189819, "logits/rejected": 0.8504976034164429, "logps/chosen": -1.6864386796951294, "logps/rejected": -3.508617401123047, "loss": 0.6676, "nll_loss": 0.6437857151031494, "rewards/accuracies": 0.875, "rewards/chosen": -0.16864386200904846, "rewards/margins": 0.18221788108348846, "rewards/rejected": -0.3508617877960205, "step": 2230 }, { "epoch": 6.108145106091718, "grad_norm": 2.7181499004364014, "learning_rate": 6.943835616438356e-07, "log_odds_chosen": 3.960731029510498, "log_odds_ratio": -0.2668440341949463, "logits/chosen": 0.7688441276550293, "logits/rejected": 0.7557736039161682, "logps/chosen": -2.392889976501465, "logps/rejected": -6.278139114379883, "loss": 0.8732, "nll_loss": 0.8465231657028198, "rewards/accuracies": 0.75, "rewards/chosen": -0.23928900063037872, "rewards/margins": 0.38852497935295105, "rewards/rejected": -0.6278139352798462, "step": 2231 }, { "epoch": 6.11088295687885, "grad_norm": 2.710540771484375, "learning_rate": 6.942465753424657e-07, "log_odds_chosen": 3.932598114013672, "log_odds_ratio": -0.12382515519857407, "logits/chosen": 1.0484671592712402, "logits/rejected": 1.0677719116210938, "logps/chosen": -2.2715940475463867, "logps/rejected": -6.10213565826416, "loss": 0.7515, "nll_loss": 0.7391490936279297, "rewards/accuracies": 1.0, "rewards/chosen": -0.22715938091278076, "rewards/margins": 0.3830541670322418, "rewards/rejected": -0.610213577747345, "step": 2232 }, { "epoch": 6.113620807665982, "grad_norm": 2.62667179107666, "learning_rate": 6.941095890410958e-07, "log_odds_chosen": 2.1526453495025635, "log_odds_ratio": -0.18834710121154785, "logits/chosen": 0.8891427516937256, "logits/rejected": 0.8195099830627441, "logps/chosen": -1.9605097770690918, "logps/rejected": -3.9362926483154297, "loss": 0.6719, "nll_loss": 0.6531130075454712, "rewards/accuracies": 1.0, "rewards/chosen": -0.1960509866476059, "rewards/margins": 0.1975782811641693, "rewards/rejected": -0.393629252910614, "step": 2233 }, { "epoch": 6.116358658453114, "grad_norm": 4.260881423950195, "learning_rate": 6.93972602739726e-07, "log_odds_chosen": 2.842047929763794, "log_odds_ratio": -0.24132905900478363, "logits/chosen": 0.9801967144012451, "logits/rejected": 1.0139143466949463, "logps/chosen": -2.2538578510284424, "logps/rejected": -4.876614570617676, "loss": 0.7826, "nll_loss": 0.7584385871887207, "rewards/accuracies": 0.875, "rewards/chosen": -0.22538580000400543, "rewards/margins": 0.26227566599845886, "rewards/rejected": -0.4876614511013031, "step": 2234 }, { "epoch": 6.119096509240246, "grad_norm": 3.1843981742858887, "learning_rate": 6.938356164383561e-07, "log_odds_chosen": 1.8592829704284668, "log_odds_ratio": -0.32133162021636963, "logits/chosen": 0.9214632511138916, "logits/rejected": 0.8295700550079346, "logps/chosen": -1.9295060634613037, "logps/rejected": -3.5720293521881104, "loss": 0.7093, "nll_loss": 0.6771726608276367, "rewards/accuracies": 1.0, "rewards/chosen": -0.19295062124729156, "rewards/margins": 0.16425231099128723, "rewards/rejected": -0.3572029173374176, "step": 2235 }, { "epoch": 6.121834360027378, "grad_norm": 2.3958654403686523, "learning_rate": 6.936986301369862e-07, "log_odds_chosen": 1.7622385025024414, "log_odds_ratio": -0.3303492069244385, "logits/chosen": 0.6774168014526367, "logits/rejected": 0.6232123374938965, "logps/chosen": -1.8069026470184326, "logps/rejected": -3.471696138381958, "loss": 0.7468, "nll_loss": 0.7137489318847656, "rewards/accuracies": 0.75, "rewards/chosen": -0.18069025874137878, "rewards/margins": 0.16647934913635254, "rewards/rejected": -0.3471696078777313, "step": 2236 }, { "epoch": 6.12457221081451, "grad_norm": 3.2802438735961914, "learning_rate": 6.935616438356164e-07, "log_odds_chosen": 0.7684113383293152, "log_odds_ratio": -0.5049877166748047, "logits/chosen": 0.7966010570526123, "logits/rejected": 0.7172530293464661, "logps/chosen": -2.067389488220215, "logps/rejected": -2.7530744075775146, "loss": 0.715, "nll_loss": 0.6645261645317078, "rewards/accuracies": 0.75, "rewards/chosen": -0.2067389190196991, "rewards/margins": 0.06856850534677505, "rewards/rejected": -0.27530744671821594, "step": 2237 }, { "epoch": 6.1273100616016425, "grad_norm": 2.8016695976257324, "learning_rate": 6.934246575342465e-07, "log_odds_chosen": 1.283940076828003, "log_odds_ratio": -0.4104180932044983, "logits/chosen": 0.6578903794288635, "logits/rejected": 0.6399812698364258, "logps/chosen": -1.533793330192566, "logps/rejected": -2.6413681507110596, "loss": 0.7674, "nll_loss": 0.7263476252555847, "rewards/accuracies": 0.75, "rewards/chosen": -0.15337933599948883, "rewards/margins": 0.1107574999332428, "rewards/rejected": -0.26413682103157043, "step": 2238 }, { "epoch": 6.1300479123887746, "grad_norm": 3.4336278438568115, "learning_rate": 6.932876712328767e-07, "log_odds_chosen": 1.5997564792633057, "log_odds_ratio": -0.3357308506965637, "logits/chosen": 0.6133447885513306, "logits/rejected": 0.5891743898391724, "logps/chosen": -2.603168249130249, "logps/rejected": -4.115633010864258, "loss": 0.7515, "nll_loss": 0.7179478406906128, "rewards/accuracies": 0.875, "rewards/chosen": -0.2603168189525604, "rewards/margins": 0.15124648809432983, "rewards/rejected": -0.41156327724456787, "step": 2239 }, { "epoch": 6.132785763175907, "grad_norm": 2.8491740226745605, "learning_rate": 6.931506849315068e-07, "log_odds_chosen": 2.9569106101989746, "log_odds_ratio": -0.2458946704864502, "logits/chosen": 1.0068894624710083, "logits/rejected": 0.9559731483459473, "logps/chosen": -1.7277933359146118, "logps/rejected": -4.557104110717773, "loss": 0.7765, "nll_loss": 0.7518700957298279, "rewards/accuracies": 1.0, "rewards/chosen": -0.17277935147285461, "rewards/margins": 0.28293102979660034, "rewards/rejected": -0.45571038126945496, "step": 2240 }, { "epoch": 6.135523613963039, "grad_norm": 2.635868549346924, "learning_rate": 6.930136986301369e-07, "log_odds_chosen": 1.7409849166870117, "log_odds_ratio": -0.2748623192310333, "logits/chosen": 0.6872857809066772, "logits/rejected": 0.6237074136734009, "logps/chosen": -1.691673994064331, "logps/rejected": -3.2824645042419434, "loss": 0.7364, "nll_loss": 0.7089599370956421, "rewards/accuracies": 1.0, "rewards/chosen": -0.1691673994064331, "rewards/margins": 0.15907905995845795, "rewards/rejected": -0.32824644446372986, "step": 2241 }, { "epoch": 6.138261464750171, "grad_norm": 3.2995986938476562, "learning_rate": 6.928767123287671e-07, "log_odds_chosen": 1.439131259918213, "log_odds_ratio": -0.4477359652519226, "logits/chosen": 0.9635218977928162, "logits/rejected": 0.9204506874084473, "logps/chosen": -2.186976909637451, "logps/rejected": -3.540466070175171, "loss": 0.7989, "nll_loss": 0.7540832757949829, "rewards/accuracies": 0.75, "rewards/chosen": -0.2186976969242096, "rewards/margins": 0.13534891605377197, "rewards/rejected": -0.35404661297798157, "step": 2242 }, { "epoch": 6.140999315537303, "grad_norm": 3.124659299850464, "learning_rate": 6.927397260273972e-07, "log_odds_chosen": 1.458441138267517, "log_odds_ratio": -0.41656702756881714, "logits/chosen": 0.6252095103263855, "logits/rejected": 0.5930358171463013, "logps/chosen": -1.7916629314422607, "logps/rejected": -3.0692381858825684, "loss": 0.7695, "nll_loss": 0.7278164029121399, "rewards/accuracies": 0.875, "rewards/chosen": -0.1791662871837616, "rewards/margins": 0.12775751948356628, "rewards/rejected": -0.3069238066673279, "step": 2243 }, { "epoch": 6.143737166324435, "grad_norm": 2.9257888793945312, "learning_rate": 6.926027397260273e-07, "log_odds_chosen": 2.2776668071746826, "log_odds_ratio": -0.18168649077415466, "logits/chosen": 0.8997455835342407, "logits/rejected": 0.9078491926193237, "logps/chosen": -1.7084908485412598, "logps/rejected": -3.8275322914123535, "loss": 0.7051, "nll_loss": 0.6869382858276367, "rewards/accuracies": 1.0, "rewards/chosen": -0.17084908485412598, "rewards/margins": 0.2119041383266449, "rewards/rejected": -0.3827532231807709, "step": 2244 }, { "epoch": 6.146475017111568, "grad_norm": 6.490832805633545, "learning_rate": 6.924657534246575e-07, "log_odds_chosen": -0.13075226545333862, "log_odds_ratio": -1.144407868385315, "logits/chosen": 0.7160053253173828, "logits/rejected": 0.6600934267044067, "logps/chosen": -2.5626981258392334, "logps/rejected": -2.385695219039917, "loss": 0.8153, "nll_loss": 0.7008453607559204, "rewards/accuracies": 0.625, "rewards/chosen": -0.25626981258392334, "rewards/margins": -0.01770028844475746, "rewards/rejected": -0.23856951296329498, "step": 2245 }, { "epoch": 6.1492128678987, "grad_norm": 2.7625105381011963, "learning_rate": 6.923287671232877e-07, "log_odds_chosen": 1.9674623012542725, "log_odds_ratio": -0.23632623255252838, "logits/chosen": 0.8331730961799622, "logits/rejected": 0.8671615719795227, "logps/chosen": -2.0685083866119385, "logps/rejected": -3.8578920364379883, "loss": 0.6756, "nll_loss": 0.6519637107849121, "rewards/accuracies": 1.0, "rewards/chosen": -0.2068508267402649, "rewards/margins": 0.1789383590221405, "rewards/rejected": -0.3857891857624054, "step": 2246 }, { "epoch": 6.151950718685832, "grad_norm": 3.058253765106201, "learning_rate": 6.921917808219177e-07, "log_odds_chosen": 2.306025505065918, "log_odds_ratio": -0.20710670948028564, "logits/chosen": 0.5549498200416565, "logits/rejected": 0.5652161836624146, "logps/chosen": -2.189295768737793, "logps/rejected": -4.374053001403809, "loss": 0.9082, "nll_loss": 0.887487530708313, "rewards/accuracies": 1.0, "rewards/chosen": -0.21892957389354706, "rewards/margins": 0.21847572922706604, "rewards/rejected": -0.4374052882194519, "step": 2247 }, { "epoch": 6.154688569472964, "grad_norm": 2.444263219833374, "learning_rate": 6.920547945205479e-07, "log_odds_chosen": 1.5707049369812012, "log_odds_ratio": -0.3621750771999359, "logits/chosen": 0.7406494617462158, "logits/rejected": 0.6961897015571594, "logps/chosen": -1.6257591247558594, "logps/rejected": -3.024169445037842, "loss": 0.7017, "nll_loss": 0.6654716730117798, "rewards/accuracies": 0.875, "rewards/chosen": -0.16257593035697937, "rewards/margins": 0.1398410201072693, "rewards/rejected": -0.30241692066192627, "step": 2248 }, { "epoch": 6.157426420260096, "grad_norm": 2.9729671478271484, "learning_rate": 6.91917808219178e-07, "log_odds_chosen": 2.9224703311920166, "log_odds_ratio": -0.15070541203022003, "logits/chosen": 1.1273658275604248, "logits/rejected": 1.151368260383606, "logps/chosen": -2.5070290565490723, "logps/rejected": -5.296784400939941, "loss": 0.6983, "nll_loss": 0.6832761168479919, "rewards/accuracies": 1.0, "rewards/chosen": -0.25070294737815857, "rewards/margins": 0.2789754867553711, "rewards/rejected": -0.5296784043312073, "step": 2249 }, { "epoch": 6.160164271047228, "grad_norm": 2.5084304809570312, "learning_rate": 6.917808219178081e-07, "log_odds_chosen": 2.078090190887451, "log_odds_ratio": -0.299424946308136, "logits/chosen": 0.5566482543945312, "logits/rejected": 0.616125226020813, "logps/chosen": -1.5301682949066162, "logps/rejected": -3.427320718765259, "loss": 0.7483, "nll_loss": 0.7183495759963989, "rewards/accuracies": 0.875, "rewards/chosen": -0.1530168354511261, "rewards/margins": 0.18971523642539978, "rewards/rejected": -0.3427320718765259, "step": 2250 }, { "epoch": 6.16290212183436, "grad_norm": 2.704511880874634, "learning_rate": 6.916438356164383e-07, "log_odds_chosen": 3.113649368286133, "log_odds_ratio": -0.19008669257164001, "logits/chosen": 1.2478817701339722, "logits/rejected": 1.2545957565307617, "logps/chosen": -1.861433506011963, "logps/rejected": -4.82755708694458, "loss": 0.681, "nll_loss": 0.6620134115219116, "rewards/accuracies": 0.875, "rewards/chosen": -0.18614336848258972, "rewards/margins": 0.29661235213279724, "rewards/rejected": -0.48275572061538696, "step": 2251 }, { "epoch": 6.165639972621492, "grad_norm": 2.751455783843994, "learning_rate": 6.915068493150684e-07, "log_odds_chosen": 1.6692566871643066, "log_odds_ratio": -0.32768821716308594, "logits/chosen": 0.8573225736618042, "logits/rejected": 0.8067899942398071, "logps/chosen": -2.0183160305023193, "logps/rejected": -3.554013252258301, "loss": 0.7115, "nll_loss": 0.6787376403808594, "rewards/accuracies": 0.75, "rewards/chosen": -0.2018316090106964, "rewards/margins": 0.15356969833374023, "rewards/rejected": -0.35540133714675903, "step": 2252 }, { "epoch": 6.168377823408624, "grad_norm": 2.9317626953125, "learning_rate": 6.913698630136985e-07, "log_odds_chosen": 3.18855357170105, "log_odds_ratio": -0.1518097221851349, "logits/chosen": 1.231724739074707, "logits/rejected": 1.2533760070800781, "logps/chosen": -2.8104543685913086, "logps/rejected": -5.867713928222656, "loss": 0.7476, "nll_loss": 0.7324150204658508, "rewards/accuracies": 0.875, "rewards/chosen": -0.28104543685913086, "rewards/margins": 0.30572593212127686, "rewards/rejected": -0.5867713689804077, "step": 2253 }, { "epoch": 6.1711156741957565, "grad_norm": 2.3362886905670166, "learning_rate": 6.912328767123287e-07, "log_odds_chosen": 3.361065149307251, "log_odds_ratio": -0.09049499034881592, "logits/chosen": 0.7762613892555237, "logits/rejected": 0.7813838720321655, "logps/chosen": -1.6540424823760986, "logps/rejected": -4.756836891174316, "loss": 0.7578, "nll_loss": 0.7487127780914307, "rewards/accuracies": 1.0, "rewards/chosen": -0.16540426015853882, "rewards/margins": 0.31027939915657043, "rewards/rejected": -0.47568368911743164, "step": 2254 }, { "epoch": 6.1738535249828885, "grad_norm": 4.035234451293945, "learning_rate": 6.910958904109588e-07, "log_odds_chosen": 0.6598626375198364, "log_odds_ratio": -0.7231520414352417, "logits/chosen": 1.0392729043960571, "logits/rejected": 1.018355369567871, "logps/chosen": -3.076223850250244, "logps/rejected": -3.685636043548584, "loss": 0.7657, "nll_loss": 0.6933630704879761, "rewards/accuracies": 0.625, "rewards/chosen": -0.30762237310409546, "rewards/margins": 0.06094121187925339, "rewards/rejected": -0.36856359243392944, "step": 2255 }, { "epoch": 6.176591375770021, "grad_norm": 2.8470938205718994, "learning_rate": 6.90958904109589e-07, "log_odds_chosen": 1.8256471157073975, "log_odds_ratio": -0.36723509430885315, "logits/chosen": 1.1103724241256714, "logits/rejected": 1.1284048557281494, "logps/chosen": -2.096794366836548, "logps/rejected": -3.798902988433838, "loss": 0.7428, "nll_loss": 0.7061174511909485, "rewards/accuracies": 0.875, "rewards/chosen": -0.20967943966388702, "rewards/margins": 0.17021086812019348, "rewards/rejected": -0.3798903226852417, "step": 2256 }, { "epoch": 6.179329226557153, "grad_norm": 2.9903225898742676, "learning_rate": 6.908219178082192e-07, "log_odds_chosen": 1.7751078605651855, "log_odds_ratio": -0.3802775740623474, "logits/chosen": 1.0066790580749512, "logits/rejected": 1.0096936225891113, "logps/chosen": -2.4835093021392822, "logps/rejected": -4.118063449859619, "loss": 0.7663, "nll_loss": 0.7282595634460449, "rewards/accuracies": 0.75, "rewards/chosen": -0.24835094809532166, "rewards/margins": 0.16345542669296265, "rewards/rejected": -0.4118063449859619, "step": 2257 }, { "epoch": 6.182067077344285, "grad_norm": 2.859477996826172, "learning_rate": 6.906849315068493e-07, "log_odds_chosen": 3.2363643646240234, "log_odds_ratio": -0.16179536283016205, "logits/chosen": 1.0599256753921509, "logits/rejected": 1.0701751708984375, "logps/chosen": -2.3316543102264404, "logps/rejected": -5.423973083496094, "loss": 0.6781, "nll_loss": 0.6619430184364319, "rewards/accuracies": 1.0, "rewards/chosen": -0.233165442943573, "rewards/margins": 0.30923187732696533, "rewards/rejected": -0.5423972606658936, "step": 2258 }, { "epoch": 6.184804928131417, "grad_norm": 3.5078673362731934, "learning_rate": 6.905479452054795e-07, "log_odds_chosen": 1.6298885345458984, "log_odds_ratio": -0.26951053738594055, "logits/chosen": 0.891955554485321, "logits/rejected": 0.8860573172569275, "logps/chosen": -2.198235511779785, "logps/rejected": -3.676720380783081, "loss": 0.6943, "nll_loss": 0.6673576235771179, "rewards/accuracies": 1.0, "rewards/chosen": -0.21982353925704956, "rewards/margins": 0.1478484869003296, "rewards/rejected": -0.36767202615737915, "step": 2259 }, { "epoch": 6.187542778918549, "grad_norm": 2.2853269577026367, "learning_rate": 6.904109589041097e-07, "log_odds_chosen": 2.9863996505737305, "log_odds_ratio": -0.17499101161956787, "logits/chosen": 0.8880684971809387, "logits/rejected": 0.8700425028800964, "logps/chosen": -2.5008344650268555, "logps/rejected": -5.3967156410217285, "loss": 0.6853, "nll_loss": 0.6678504943847656, "rewards/accuracies": 0.875, "rewards/chosen": -0.25008344650268555, "rewards/margins": 0.2895881235599518, "rewards/rejected": -0.5396715402603149, "step": 2260 }, { "epoch": 6.190280629705681, "grad_norm": 3.721144676208496, "learning_rate": 6.902739726027397e-07, "log_odds_chosen": 3.3655929565429688, "log_odds_ratio": -0.18805599212646484, "logits/chosen": 1.122916579246521, "logits/rejected": 1.134019136428833, "logps/chosen": -2.377859592437744, "logps/rejected": -5.614513874053955, "loss": 0.7365, "nll_loss": 0.7176545858383179, "rewards/accuracies": 0.875, "rewards/chosen": -0.2377859652042389, "rewards/margins": 0.3236654996871948, "rewards/rejected": -0.5614514350891113, "step": 2261 }, { "epoch": 6.193018480492813, "grad_norm": 2.6307268142700195, "learning_rate": 6.901369863013699e-07, "log_odds_chosen": 2.001432418823242, "log_odds_ratio": -0.2521349787712097, "logits/chosen": 1.0524406433105469, "logits/rejected": 1.0320090055465698, "logps/chosen": -1.8293418884277344, "logps/rejected": -3.6824862957000732, "loss": 0.6418, "nll_loss": 0.6166204810142517, "rewards/accuracies": 1.0, "rewards/chosen": -0.18293419480323792, "rewards/margins": 0.18531444668769836, "rewards/rejected": -0.3682486414909363, "step": 2262 }, { "epoch": 6.195756331279945, "grad_norm": 3.3352274894714355, "learning_rate": 6.9e-07, "log_odds_chosen": 1.0838834047317505, "log_odds_ratio": -0.37933793663978577, "logits/chosen": 0.4943930506706238, "logits/rejected": 0.48214244842529297, "logps/chosen": -2.384744882583618, "logps/rejected": -3.3709702491760254, "loss": 0.7091, "nll_loss": 0.6711558699607849, "rewards/accuracies": 0.75, "rewards/chosen": -0.23847448825836182, "rewards/margins": 0.09862251579761505, "rewards/rejected": -0.3370969891548157, "step": 2263 }, { "epoch": 6.198494182067077, "grad_norm": 3.3751883506774902, "learning_rate": 6.898630136986301e-07, "log_odds_chosen": 2.1808080673217773, "log_odds_ratio": -0.4983258843421936, "logits/chosen": 0.8486031293869019, "logits/rejected": 0.8195372819900513, "logps/chosen": -2.369847536087036, "logps/rejected": -4.4706830978393555, "loss": 0.7755, "nll_loss": 0.7256926894187927, "rewards/accuracies": 0.625, "rewards/chosen": -0.2369847446680069, "rewards/margins": 0.21008357405662537, "rewards/rejected": -0.44706833362579346, "step": 2264 }, { "epoch": 6.201232032854209, "grad_norm": 2.9724819660186768, "learning_rate": 6.897260273972603e-07, "log_odds_chosen": 3.3962886333465576, "log_odds_ratio": -0.05474710464477539, "logits/chosen": 0.9207008481025696, "logits/rejected": 0.8715194463729858, "logps/chosen": -2.211182117462158, "logps/rejected": -5.4618730545043945, "loss": 0.8271, "nll_loss": 0.8216290473937988, "rewards/accuracies": 1.0, "rewards/chosen": -0.22111821174621582, "rewards/margins": 0.3250690996646881, "rewards/rejected": -0.5461872816085815, "step": 2265 }, { "epoch": 6.203969883641341, "grad_norm": 2.640953302383423, "learning_rate": 6.895890410958904e-07, "log_odds_chosen": 1.688141942024231, "log_odds_ratio": -0.22721117734909058, "logits/chosen": 1.145066499710083, "logits/rejected": 1.1849037408828735, "logps/chosen": -2.2750096321105957, "logps/rejected": -3.8507869243621826, "loss": 0.7252, "nll_loss": 0.7024837136268616, "rewards/accuracies": 1.0, "rewards/chosen": -0.22750094532966614, "rewards/margins": 0.1575777232646942, "rewards/rejected": -0.38507866859436035, "step": 2266 }, { "epoch": 6.206707734428473, "grad_norm": 2.4920172691345215, "learning_rate": 6.894520547945205e-07, "log_odds_chosen": 3.1296048164367676, "log_odds_ratio": -0.18521222472190857, "logits/chosen": 0.9048349857330322, "logits/rejected": 0.8648936748504639, "logps/chosen": -1.6967957019805908, "logps/rejected": -4.629744052886963, "loss": 0.6469, "nll_loss": 0.6283739805221558, "rewards/accuracies": 1.0, "rewards/chosen": -0.16967958211898804, "rewards/margins": 0.29329484701156616, "rewards/rejected": -0.4629744291305542, "step": 2267 }, { "epoch": 6.209445585215605, "grad_norm": 3.311659812927246, "learning_rate": 6.893150684931507e-07, "log_odds_chosen": 1.539782166481018, "log_odds_ratio": -0.34962037205696106, "logits/chosen": 0.6877796053886414, "logits/rejected": 0.6446018218994141, "logps/chosen": -2.172914981842041, "logps/rejected": -3.599557399749756, "loss": 0.7893, "nll_loss": 0.7543450593948364, "rewards/accuracies": 0.875, "rewards/chosen": -0.21729150414466858, "rewards/margins": 0.14266425371170044, "rewards/rejected": -0.3599557876586914, "step": 2268 }, { "epoch": 6.2121834360027375, "grad_norm": 2.2061264514923096, "learning_rate": 6.891780821917808e-07, "log_odds_chosen": 2.1083083152770996, "log_odds_ratio": -0.24018236994743347, "logits/chosen": 0.8171175718307495, "logits/rejected": 0.7917612791061401, "logps/chosen": -1.8670635223388672, "logps/rejected": -3.829784870147705, "loss": 0.7301, "nll_loss": 0.7060568332672119, "rewards/accuracies": 1.0, "rewards/chosen": -0.18670636415481567, "rewards/margins": 0.1962721347808838, "rewards/rejected": -0.38297849893569946, "step": 2269 }, { "epoch": 6.2149212867898695, "grad_norm": 3.3893535137176514, "learning_rate": 6.89041095890411e-07, "log_odds_chosen": 0.7210841774940491, "log_odds_ratio": -0.5088762640953064, "logits/chosen": 0.7093889117240906, "logits/rejected": 0.7828195691108704, "logps/chosen": -2.2418932914733887, "logps/rejected": -2.8671417236328125, "loss": 0.7212, "nll_loss": 0.6703604459762573, "rewards/accuracies": 0.625, "rewards/chosen": -0.22418932616710663, "rewards/margins": 0.06252485513687134, "rewards/rejected": -0.2867141664028168, "step": 2270 }, { "epoch": 6.217659137577002, "grad_norm": 3.154844284057617, "learning_rate": 6.889041095890411e-07, "log_odds_chosen": 4.613955497741699, "log_odds_ratio": -0.2614350914955139, "logits/chosen": 0.7519054412841797, "logits/rejected": 0.666694164276123, "logps/chosen": -2.417890787124634, "logps/rejected": -6.87573766708374, "loss": 0.7552, "nll_loss": 0.7290147542953491, "rewards/accuracies": 0.875, "rewards/chosen": -0.2417891025543213, "rewards/margins": 0.44578471779823303, "rewards/rejected": -0.6875737905502319, "step": 2271 }, { "epoch": 6.220396988364135, "grad_norm": 5.752240180969238, "learning_rate": 6.887671232876712e-07, "log_odds_chosen": 2.088653802871704, "log_odds_ratio": -0.4264147877693176, "logits/chosen": 0.9510327577590942, "logits/rejected": 0.9083656668663025, "logps/chosen": -2.66774320602417, "logps/rejected": -4.654653072357178, "loss": 0.8226, "nll_loss": 0.7799590826034546, "rewards/accuracies": 0.625, "rewards/chosen": -0.26677435636520386, "rewards/margins": 0.1986910104751587, "rewards/rejected": -0.46546536684036255, "step": 2272 }, { "epoch": 6.223134839151267, "grad_norm": 3.3120808601379395, "learning_rate": 6.886301369863014e-07, "log_odds_chosen": 1.9492310285568237, "log_odds_ratio": -0.22333666682243347, "logits/chosen": 0.7591709494590759, "logits/rejected": 0.7747179269790649, "logps/chosen": -2.2244694232940674, "logps/rejected": -4.058551788330078, "loss": 0.6832, "nll_loss": 0.6608691215515137, "rewards/accuracies": 1.0, "rewards/chosen": -0.22244694828987122, "rewards/margins": 0.1834082305431366, "rewards/rejected": -0.4058551490306854, "step": 2273 }, { "epoch": 6.225872689938399, "grad_norm": 2.6333212852478027, "learning_rate": 6.884931506849315e-07, "log_odds_chosen": 2.3890299797058105, "log_odds_ratio": -0.20133256912231445, "logits/chosen": 0.9596173763275146, "logits/rejected": 0.9484187364578247, "logps/chosen": -1.6352665424346924, "logps/rejected": -3.8396191596984863, "loss": 0.687, "nll_loss": 0.666845977306366, "rewards/accuracies": 1.0, "rewards/chosen": -0.16352665424346924, "rewards/margins": 0.2204352468252182, "rewards/rejected": -0.38396191596984863, "step": 2274 }, { "epoch": 6.228610540725531, "grad_norm": 3.6631743907928467, "learning_rate": 6.883561643835616e-07, "log_odds_chosen": 2.3340866565704346, "log_odds_ratio": -0.4573836624622345, "logits/chosen": 0.9477233290672302, "logits/rejected": 0.9291483759880066, "logps/chosen": -2.6544785499572754, "logps/rejected": -4.902537822723389, "loss": 0.7438, "nll_loss": 0.6980361938476562, "rewards/accuracies": 0.75, "rewards/chosen": -0.26544785499572754, "rewards/margins": 0.22480596601963043, "rewards/rejected": -0.4902538061141968, "step": 2275 }, { "epoch": 6.231348391512663, "grad_norm": 2.7716386318206787, "learning_rate": 6.882191780821918e-07, "log_odds_chosen": 2.335848808288574, "log_odds_ratio": -0.22539068758487701, "logits/chosen": 0.886131763458252, "logits/rejected": 0.8942832350730896, "logps/chosen": -1.914390206336975, "logps/rejected": -4.111694812774658, "loss": 0.7162, "nll_loss": 0.6936534643173218, "rewards/accuracies": 1.0, "rewards/chosen": -0.19143903255462646, "rewards/margins": 0.2197304666042328, "rewards/rejected": -0.41116946935653687, "step": 2276 }, { "epoch": 6.234086242299795, "grad_norm": 2.3739845752716064, "learning_rate": 6.88082191780822e-07, "log_odds_chosen": 4.000176429748535, "log_odds_ratio": -0.11043740063905716, "logits/chosen": 1.118644118309021, "logits/rejected": 1.1487174034118652, "logps/chosen": -2.0048606395721436, "logps/rejected": -5.844433784484863, "loss": 0.7182, "nll_loss": 0.7071198225021362, "rewards/accuracies": 1.0, "rewards/chosen": -0.20048607885837555, "rewards/margins": 0.3839573264122009, "rewards/rejected": -0.5844434499740601, "step": 2277 }, { "epoch": 6.236824093086927, "grad_norm": 2.802605152130127, "learning_rate": 6.87945205479452e-07, "log_odds_chosen": 2.2823541164398193, "log_odds_ratio": -0.19731158018112183, "logits/chosen": 0.7597243189811707, "logits/rejected": 0.7753682136535645, "logps/chosen": -1.6769335269927979, "logps/rejected": -3.738546371459961, "loss": 0.6735, "nll_loss": 0.6537405252456665, "rewards/accuracies": 1.0, "rewards/chosen": -0.1676933616399765, "rewards/margins": 0.20616132020950317, "rewards/rejected": -0.3738546669483185, "step": 2278 }, { "epoch": 6.239561943874059, "grad_norm": 2.8231725692749023, "learning_rate": 6.878082191780822e-07, "log_odds_chosen": 2.4015142917633057, "log_odds_ratio": -0.24423028528690338, "logits/chosen": 0.7476966381072998, "logits/rejected": 0.7163123488426208, "logps/chosen": -2.2346742153167725, "logps/rejected": -4.536835670471191, "loss": 0.7706, "nll_loss": 0.7462025284767151, "rewards/accuracies": 0.875, "rewards/chosen": -0.2234674096107483, "rewards/margins": 0.23021617531776428, "rewards/rejected": -0.45368361473083496, "step": 2279 }, { "epoch": 6.242299794661191, "grad_norm": 3.14947772026062, "learning_rate": 6.876712328767123e-07, "log_odds_chosen": 1.6784279346466064, "log_odds_ratio": -0.3845483362674713, "logits/chosen": 0.8032419681549072, "logits/rejected": 0.8064735531806946, "logps/chosen": -2.42482590675354, "logps/rejected": -4.042302131652832, "loss": 0.7702, "nll_loss": 0.7317134141921997, "rewards/accuracies": 0.875, "rewards/chosen": -0.24248260259628296, "rewards/margins": 0.16174761950969696, "rewards/rejected": -0.4042302370071411, "step": 2280 }, { "epoch": 6.245037645448323, "grad_norm": 4.588074207305908, "learning_rate": 6.875342465753424e-07, "log_odds_chosen": 2.1555287837982178, "log_odds_ratio": -0.2479916214942932, "logits/chosen": 0.9278093576431274, "logits/rejected": 0.9035706520080566, "logps/chosen": -2.1123127937316895, "logps/rejected": -4.143575668334961, "loss": 0.7555, "nll_loss": 0.7307342886924744, "rewards/accuracies": 0.875, "rewards/chosen": -0.2112312763929367, "rewards/margins": 0.20312628149986267, "rewards/rejected": -0.41435757279396057, "step": 2281 }, { "epoch": 6.247775496235455, "grad_norm": 2.645718574523926, "learning_rate": 6.873972602739726e-07, "log_odds_chosen": 1.5307116508483887, "log_odds_ratio": -0.2970832288265228, "logits/chosen": 0.9615881443023682, "logits/rejected": 0.9550105333328247, "logps/chosen": -2.14310622215271, "logps/rejected": -3.5799989700317383, "loss": 0.7059, "nll_loss": 0.6761792898178101, "rewards/accuracies": 1.0, "rewards/chosen": -0.2143106311559677, "rewards/margins": 0.14368927478790283, "rewards/rejected": -0.35799989104270935, "step": 2282 }, { "epoch": 6.250513347022587, "grad_norm": 2.538053274154663, "learning_rate": 6.872602739726027e-07, "log_odds_chosen": 4.9045000076293945, "log_odds_ratio": -0.12282317876815796, "logits/chosen": 0.9180744290351868, "logits/rejected": 0.9276795387268066, "logps/chosen": -2.16978120803833, "logps/rejected": -6.942805290222168, "loss": 0.7272, "nll_loss": 0.7149187922477722, "rewards/accuracies": 0.875, "rewards/chosen": -0.21697811782360077, "rewards/margins": 0.4773024320602417, "rewards/rejected": -0.6942805647850037, "step": 2283 }, { "epoch": 6.253251197809719, "grad_norm": 4.551022052764893, "learning_rate": 6.871232876712329e-07, "log_odds_chosen": 1.5323735475540161, "log_odds_ratio": -0.5707528591156006, "logits/chosen": 0.8801594972610474, "logits/rejected": 0.7545703053474426, "logps/chosen": -2.5137736797332764, "logps/rejected": -3.9129180908203125, "loss": 0.8944, "nll_loss": 0.837304949760437, "rewards/accuracies": 0.875, "rewards/chosen": -0.2513773739337921, "rewards/margins": 0.13991443812847137, "rewards/rejected": -0.3912917971611023, "step": 2284 }, { "epoch": 6.2559890485968515, "grad_norm": 3.0455970764160156, "learning_rate": 6.86986301369863e-07, "log_odds_chosen": 0.9384739398956299, "log_odds_ratio": -0.4515596926212311, "logits/chosen": 0.8692582249641418, "logits/rejected": 0.8255742192268372, "logps/chosen": -2.2607340812683105, "logps/rejected": -3.1273977756500244, "loss": 0.7596, "nll_loss": 0.7144800424575806, "rewards/accuracies": 0.75, "rewards/chosen": -0.22607342898845673, "rewards/margins": 0.08666636049747467, "rewards/rejected": -0.3127397894859314, "step": 2285 }, { "epoch": 6.2587268993839835, "grad_norm": 6.457612037658691, "learning_rate": 6.868493150684931e-07, "log_odds_chosen": 2.8091864585876465, "log_odds_ratio": -0.527545154094696, "logits/chosen": 1.1537489891052246, "logits/rejected": 1.1598676443099976, "logps/chosen": -3.1997504234313965, "logps/rejected": -5.848048686981201, "loss": 0.9215, "nll_loss": 0.8687371015548706, "rewards/accuracies": 0.875, "rewards/chosen": -0.3199750483036041, "rewards/margins": 0.2648298442363739, "rewards/rejected": -0.584804892539978, "step": 2286 }, { "epoch": 6.261464750171116, "grad_norm": 2.8371496200561523, "learning_rate": 6.867123287671233e-07, "log_odds_chosen": 2.3946244716644287, "log_odds_ratio": -0.30741626024246216, "logits/chosen": 0.9176433682441711, "logits/rejected": 0.850826621055603, "logps/chosen": -1.8731361627578735, "logps/rejected": -4.083715915679932, "loss": 0.7155, "nll_loss": 0.6847474575042725, "rewards/accuracies": 0.875, "rewards/chosen": -0.18731361627578735, "rewards/margins": 0.2210579663515091, "rewards/rejected": -0.40837159752845764, "step": 2287 }, { "epoch": 6.264202600958248, "grad_norm": 2.653015613555908, "learning_rate": 6.865753424657534e-07, "log_odds_chosen": 3.7590017318725586, "log_odds_ratio": -0.14022065699100494, "logits/chosen": 0.9484294056892395, "logits/rejected": 0.9877538084983826, "logps/chosen": -2.7434115409851074, "logps/rejected": -6.436131477355957, "loss": 0.7418, "nll_loss": 0.7278041839599609, "rewards/accuracies": 1.0, "rewards/chosen": -0.2743411660194397, "rewards/margins": 0.36927202343940735, "rewards/rejected": -0.6436132192611694, "step": 2288 }, { "epoch": 6.26694045174538, "grad_norm": 2.5654423236846924, "learning_rate": 6.864383561643835e-07, "log_odds_chosen": 2.5708587169647217, "log_odds_ratio": -0.20231342315673828, "logits/chosen": 0.8557375073432922, "logits/rejected": 0.8303905725479126, "logps/chosen": -1.7780447006225586, "logps/rejected": -4.157819747924805, "loss": 0.7042, "nll_loss": 0.6839441657066345, "rewards/accuracies": 1.0, "rewards/chosen": -0.17780447006225586, "rewards/margins": 0.23797748982906342, "rewards/rejected": -0.41578197479248047, "step": 2289 }, { "epoch": 6.269678302532512, "grad_norm": 2.706070899963379, "learning_rate": 6.863013698630137e-07, "log_odds_chosen": 1.903260588645935, "log_odds_ratio": -0.27005183696746826, "logits/chosen": 0.6837860345840454, "logits/rejected": 0.6702237129211426, "logps/chosen": -1.873856544494629, "logps/rejected": -3.62408447265625, "loss": 0.6933, "nll_loss": 0.6663235425949097, "rewards/accuracies": 1.0, "rewards/chosen": -0.1873856484889984, "rewards/margins": 0.17502276599407196, "rewards/rejected": -0.3624083995819092, "step": 2290 }, { "epoch": 6.272416153319644, "grad_norm": 2.7141926288604736, "learning_rate": 6.861643835616439e-07, "log_odds_chosen": 2.5817079544067383, "log_odds_ratio": -0.19621792435646057, "logits/chosen": 0.9776360988616943, "logits/rejected": 0.9761321544647217, "logps/chosen": -1.8329157829284668, "logps/rejected": -4.179248332977295, "loss": 0.7188, "nll_loss": 0.6992054581642151, "rewards/accuracies": 1.0, "rewards/chosen": -0.18329158425331116, "rewards/margins": 0.23463323712348938, "rewards/rejected": -0.41792482137680054, "step": 2291 }, { "epoch": 6.275154004106776, "grad_norm": 2.791198253631592, "learning_rate": 6.860273972602739e-07, "log_odds_chosen": 2.4136412143707275, "log_odds_ratio": -0.2416476011276245, "logits/chosen": 0.9177847504615784, "logits/rejected": 0.9224110245704651, "logps/chosen": -2.916046142578125, "logps/rejected": -5.2513885498046875, "loss": 0.7503, "nll_loss": 0.7261548638343811, "rewards/accuracies": 0.875, "rewards/chosen": -0.291604608297348, "rewards/margins": 0.23353423178195953, "rewards/rejected": -0.5251388549804688, "step": 2292 }, { "epoch": 6.277891854893908, "grad_norm": 2.8008012771606445, "learning_rate": 6.858904109589041e-07, "log_odds_chosen": 1.7578743696212769, "log_odds_ratio": -0.3529176115989685, "logits/chosen": 0.6124217510223389, "logits/rejected": 0.5872272849082947, "logps/chosen": -1.7336680889129639, "logps/rejected": -3.3900370597839355, "loss": 0.717, "nll_loss": 0.6817574501037598, "rewards/accuracies": 0.75, "rewards/chosen": -0.17336681485176086, "rewards/margins": 0.16563689708709717, "rewards/rejected": -0.33900371193885803, "step": 2293 }, { "epoch": 6.28062970568104, "grad_norm": 3.342052698135376, "learning_rate": 6.857534246575342e-07, "log_odds_chosen": 1.979013442993164, "log_odds_ratio": -0.2511281669139862, "logits/chosen": 0.9028401374816895, "logits/rejected": 0.8817537426948547, "logps/chosen": -2.426121234893799, "logps/rejected": -4.301385879516602, "loss": 0.826, "nll_loss": 0.8008909821510315, "rewards/accuracies": 0.875, "rewards/chosen": -0.24261212348937988, "rewards/margins": 0.18752649426460266, "rewards/rejected": -0.43013858795166016, "step": 2294 }, { "epoch": 6.283367556468172, "grad_norm": 2.7961976528167725, "learning_rate": 6.856164383561643e-07, "log_odds_chosen": 2.3697972297668457, "log_odds_ratio": -0.43307581543922424, "logits/chosen": 0.8486621379852295, "logits/rejected": 0.8189390897750854, "logps/chosen": -1.9295077323913574, "logps/rejected": -4.140743255615234, "loss": 0.7708, "nll_loss": 0.7274439334869385, "rewards/accuracies": 0.875, "rewards/chosen": -0.1929507702589035, "rewards/margins": 0.22112354636192322, "rewards/rejected": -0.4140743315219879, "step": 2295 }, { "epoch": 6.286105407255304, "grad_norm": 2.319443464279175, "learning_rate": 6.854794520547945e-07, "log_odds_chosen": 2.1715316772460938, "log_odds_ratio": -0.27054721117019653, "logits/chosen": 1.090771198272705, "logits/rejected": 1.0215861797332764, "logps/chosen": -1.6217730045318604, "logps/rejected": -3.6502115726470947, "loss": 0.6974, "nll_loss": 0.6703616976737976, "rewards/accuracies": 1.0, "rewards/chosen": -0.16217729449272156, "rewards/margins": 0.20284388959407806, "rewards/rejected": -0.3650211691856384, "step": 2296 }, { "epoch": 6.288843258042437, "grad_norm": 2.459578275680542, "learning_rate": 6.853424657534246e-07, "log_odds_chosen": 3.3081917762756348, "log_odds_ratio": -0.1811310201883316, "logits/chosen": 0.6961829662322998, "logits/rejected": 0.6657987833023071, "logps/chosen": -2.2173538208007812, "logps/rejected": -5.425610065460205, "loss": 0.7767, "nll_loss": 0.758540689945221, "rewards/accuracies": 1.0, "rewards/chosen": -0.2217353880405426, "rewards/margins": 0.3208256959915161, "rewards/rejected": -0.5425610542297363, "step": 2297 }, { "epoch": 6.291581108829568, "grad_norm": 4.253796100616455, "learning_rate": 6.852054794520548e-07, "log_odds_chosen": 0.40872496366500854, "log_odds_ratio": -0.65165114402771, "logits/chosen": 0.6334126591682434, "logits/rejected": 0.6482817530632019, "logps/chosen": -2.409637451171875, "logps/rejected": -2.780416965484619, "loss": 0.8097, "nll_loss": 0.7445757389068604, "rewards/accuracies": 0.625, "rewards/chosen": -0.24096374213695526, "rewards/margins": 0.037077974528074265, "rewards/rejected": -0.2780417203903198, "step": 2298 }, { "epoch": 6.294318959616701, "grad_norm": 2.614076614379883, "learning_rate": 6.850684931506849e-07, "log_odds_chosen": 2.7059614658355713, "log_odds_ratio": -0.3316833972930908, "logits/chosen": 0.7256385087966919, "logits/rejected": 0.7099350690841675, "logps/chosen": -1.5634963512420654, "logps/rejected": -3.9339728355407715, "loss": 0.6884, "nll_loss": 0.6552190184593201, "rewards/accuracies": 0.875, "rewards/chosen": -0.15634962916374207, "rewards/margins": 0.23704767227172852, "rewards/rejected": -0.39339733123779297, "step": 2299 }, { "epoch": 6.297056810403833, "grad_norm": 2.690380334854126, "learning_rate": 6.84931506849315e-07, "log_odds_chosen": 1.4319961071014404, "log_odds_ratio": -0.4607674479484558, "logits/chosen": 0.6348631978034973, "logits/rejected": 0.601123571395874, "logps/chosen": -2.0041043758392334, "logps/rejected": -3.408642530441284, "loss": 0.7683, "nll_loss": 0.7222191691398621, "rewards/accuracies": 0.75, "rewards/chosen": -0.20041044056415558, "rewards/margins": 0.14045381546020508, "rewards/rejected": -0.34086427092552185, "step": 2300 }, { "epoch": 6.299794661190965, "grad_norm": 4.312866687774658, "learning_rate": 6.847945205479452e-07, "log_odds_chosen": 1.0270652770996094, "log_odds_ratio": -0.5615211725234985, "logits/chosen": 0.675508439540863, "logits/rejected": 0.6478985548019409, "logps/chosen": -1.985568642616272, "logps/rejected": -2.7640013694763184, "loss": 0.7328, "nll_loss": 0.6766375303268433, "rewards/accuracies": 0.875, "rewards/chosen": -0.19855687022209167, "rewards/margins": 0.07784327864646912, "rewards/rejected": -0.2764001488685608, "step": 2301 }, { "epoch": 6.3025325119780975, "grad_norm": 3.4951884746551514, "learning_rate": 6.846575342465753e-07, "log_odds_chosen": 2.0439980030059814, "log_odds_ratio": -0.6729533076286316, "logits/chosen": 0.799360990524292, "logits/rejected": 0.7914236783981323, "logps/chosen": -2.4836947917938232, "logps/rejected": -4.423443794250488, "loss": 0.7178, "nll_loss": 0.6504567861557007, "rewards/accuracies": 0.875, "rewards/chosen": -0.2483694851398468, "rewards/margins": 0.19397491216659546, "rewards/rejected": -0.44234439730644226, "step": 2302 }, { "epoch": 6.30527036276523, "grad_norm": 3.736119270324707, "learning_rate": 6.845205479452054e-07, "log_odds_chosen": 3.6566591262817383, "log_odds_ratio": -0.09958484768867493, "logits/chosen": 1.0073466300964355, "logits/rejected": 0.9243565797805786, "logps/chosen": -2.0605833530426025, "logps/rejected": -5.492927551269531, "loss": 0.727, "nll_loss": 0.7170240879058838, "rewards/accuracies": 1.0, "rewards/chosen": -0.2060583531856537, "rewards/margins": 0.34323444962501526, "rewards/rejected": -0.549292802810669, "step": 2303 }, { "epoch": 6.308008213552362, "grad_norm": 2.621047258377075, "learning_rate": 6.843835616438356e-07, "log_odds_chosen": 2.20104718208313, "log_odds_ratio": -0.1774653196334839, "logits/chosen": 0.9360541105270386, "logits/rejected": 0.9098536968231201, "logps/chosen": -1.6773113012313843, "logps/rejected": -3.688821315765381, "loss": 0.6299, "nll_loss": 0.6121447086334229, "rewards/accuracies": 1.0, "rewards/chosen": -0.1677311360836029, "rewards/margins": 0.2011510133743286, "rewards/rejected": -0.3688821494579315, "step": 2304 }, { "epoch": 6.310746064339494, "grad_norm": 4.141910076141357, "learning_rate": 6.842465753424658e-07, "log_odds_chosen": 2.800058603286743, "log_odds_ratio": -0.18331442773342133, "logits/chosen": 1.0643832683563232, "logits/rejected": 1.0739593505859375, "logps/chosen": -2.183291435241699, "logps/rejected": -4.8261332511901855, "loss": 0.7438, "nll_loss": 0.7254629135131836, "rewards/accuracies": 0.875, "rewards/chosen": -0.21832911670207977, "rewards/margins": 0.2642842233181, "rewards/rejected": -0.48261332511901855, "step": 2305 }, { "epoch": 6.313483915126626, "grad_norm": 2.450183391571045, "learning_rate": 6.841095890410958e-07, "log_odds_chosen": 2.3311607837677, "log_odds_ratio": -0.20340055227279663, "logits/chosen": 0.7088639736175537, "logits/rejected": 0.6405667066574097, "logps/chosen": -1.7946600914001465, "logps/rejected": -3.980292558670044, "loss": 0.738, "nll_loss": 0.7177044153213501, "rewards/accuracies": 1.0, "rewards/chosen": -0.17946600914001465, "rewards/margins": 0.2185632437467575, "rewards/rejected": -0.39802926778793335, "step": 2306 }, { "epoch": 6.316221765913758, "grad_norm": 3.1288790702819824, "learning_rate": 6.83972602739726e-07, "log_odds_chosen": 1.9830331802368164, "log_odds_ratio": -0.24629339575767517, "logits/chosen": 0.9703240394592285, "logits/rejected": 0.9610992670059204, "logps/chosen": -1.8145689964294434, "logps/rejected": -3.625454902648926, "loss": 0.6489, "nll_loss": 0.6242450475692749, "rewards/accuracies": 1.0, "rewards/chosen": -0.18145689368247986, "rewards/margins": 0.18108859658241272, "rewards/rejected": -0.3625454902648926, "step": 2307 }, { "epoch": 6.31895961670089, "grad_norm": 2.565990686416626, "learning_rate": 6.838356164383562e-07, "log_odds_chosen": 1.4406814575195312, "log_odds_ratio": -0.41655731201171875, "logits/chosen": 0.7968078851699829, "logits/rejected": 0.7832955718040466, "logps/chosen": -2.206742525100708, "logps/rejected": -3.5876283645629883, "loss": 0.7772, "nll_loss": 0.7355653047561646, "rewards/accuracies": 0.875, "rewards/chosen": -0.22067426145076752, "rewards/margins": 0.13808858394622803, "rewards/rejected": -0.35876283049583435, "step": 2308 }, { "epoch": 6.321697467488022, "grad_norm": 2.523695468902588, "learning_rate": 6.836986301369862e-07, "log_odds_chosen": 4.434142112731934, "log_odds_ratio": -0.133954256772995, "logits/chosen": 1.170245885848999, "logits/rejected": 1.2092063426971436, "logps/chosen": -3.074906349182129, "logps/rejected": -7.446093559265137, "loss": 0.6721, "nll_loss": 0.6587296724319458, "rewards/accuracies": 1.0, "rewards/chosen": -0.30749064683914185, "rewards/margins": 0.4371187090873718, "rewards/rejected": -0.7446093559265137, "step": 2309 }, { "epoch": 6.324435318275154, "grad_norm": 4.29640531539917, "learning_rate": 6.835616438356164e-07, "log_odds_chosen": 1.3095135688781738, "log_odds_ratio": -0.5306779742240906, "logits/chosen": 0.7470738887786865, "logits/rejected": 0.7548037767410278, "logps/chosen": -2.682849645614624, "logps/rejected": -3.840102434158325, "loss": 0.8233, "nll_loss": 0.7702608108520508, "rewards/accuracies": 0.75, "rewards/chosen": -0.26828497648239136, "rewards/margins": 0.11572527885437012, "rewards/rejected": -0.3840102553367615, "step": 2310 }, { "epoch": 6.327173169062286, "grad_norm": 2.513521194458008, "learning_rate": 6.834246575342465e-07, "log_odds_chosen": 3.4898338317871094, "log_odds_ratio": -0.2128521353006363, "logits/chosen": 0.9106273651123047, "logits/rejected": 0.922848105430603, "logps/chosen": -1.7687678337097168, "logps/rejected": -5.0955657958984375, "loss": 0.6432, "nll_loss": 0.6219273209571838, "rewards/accuracies": 0.875, "rewards/chosen": -0.17687679827213287, "rewards/margins": 0.332679808139801, "rewards/rejected": -0.5095566511154175, "step": 2311 }, { "epoch": 6.329911019849418, "grad_norm": 2.96567964553833, "learning_rate": 6.832876712328767e-07, "log_odds_chosen": 2.1750199794769287, "log_odds_ratio": -0.22754673659801483, "logits/chosen": 1.0086464881896973, "logits/rejected": 1.0387630462646484, "logps/chosen": -2.2226667404174805, "logps/rejected": -4.300551414489746, "loss": 0.671, "nll_loss": 0.6482669711112976, "rewards/accuracies": 1.0, "rewards/chosen": -0.22226664423942566, "rewards/margins": 0.20778851211071014, "rewards/rejected": -0.430055171251297, "step": 2312 }, { "epoch": 6.33264887063655, "grad_norm": 4.4774017333984375, "learning_rate": 6.831506849315068e-07, "log_odds_chosen": 2.8504397869110107, "log_odds_ratio": -0.30439215898513794, "logits/chosen": 1.1257736682891846, "logits/rejected": 1.1181268692016602, "logps/chosen": -2.5980916023254395, "logps/rejected": -5.376603126525879, "loss": 0.7292, "nll_loss": 0.6987842917442322, "rewards/accuracies": 0.75, "rewards/chosen": -0.2598091661930084, "rewards/margins": 0.2778511643409729, "rewards/rejected": -0.5376603007316589, "step": 2313 }, { "epoch": 6.335386721423682, "grad_norm": 5.680781364440918, "learning_rate": 6.830136986301369e-07, "log_odds_chosen": 1.9542369842529297, "log_odds_ratio": -0.7406448125839233, "logits/chosen": 0.773565948009491, "logits/rejected": 0.778747022151947, "logps/chosen": -2.579336643218994, "logps/rejected": -4.362490177154541, "loss": 0.8389, "nll_loss": 0.7648851871490479, "rewards/accuracies": 0.875, "rewards/chosen": -0.25793367624282837, "rewards/margins": 0.17831534147262573, "rewards/rejected": -0.4362490177154541, "step": 2314 }, { "epoch": 6.338124572210814, "grad_norm": 2.5719943046569824, "learning_rate": 6.828767123287671e-07, "log_odds_chosen": 2.8837320804595947, "log_odds_ratio": -0.25345683097839355, "logits/chosen": 0.827350378036499, "logits/rejected": 0.8282378911972046, "logps/chosen": -1.443009853363037, "logps/rejected": -4.122261047363281, "loss": 0.7069, "nll_loss": 0.6815210580825806, "rewards/accuracies": 1.0, "rewards/chosen": -0.14430098235607147, "rewards/margins": 0.26792511343955994, "rewards/rejected": -0.4122261106967926, "step": 2315 }, { "epoch": 6.3408624229979464, "grad_norm": 2.8870441913604736, "learning_rate": 6.827397260273972e-07, "log_odds_chosen": 2.276639461517334, "log_odds_ratio": -0.2685036361217499, "logits/chosen": 0.6426041126251221, "logits/rejected": 0.6107953786849976, "logps/chosen": -1.8196048736572266, "logps/rejected": -3.9818665981292725, "loss": 0.7902, "nll_loss": 0.7633640766143799, "rewards/accuracies": 0.875, "rewards/chosen": -0.18196050822734833, "rewards/margins": 0.21622616052627563, "rewards/rejected": -0.39818665385246277, "step": 2316 }, { "epoch": 6.3436002737850785, "grad_norm": 3.572234869003296, "learning_rate": 6.826027397260273e-07, "log_odds_chosen": 1.8988451957702637, "log_odds_ratio": -0.36147090792655945, "logits/chosen": 0.8942128419876099, "logits/rejected": 0.881771445274353, "logps/chosen": -2.4130570888519287, "logps/rejected": -4.225671768188477, "loss": 0.7138, "nll_loss": 0.6776673793792725, "rewards/accuracies": 0.75, "rewards/chosen": -0.24130569398403168, "rewards/margins": 0.18126150965690613, "rewards/rejected": -0.422567218542099, "step": 2317 }, { "epoch": 6.346338124572211, "grad_norm": 3.616342782974243, "learning_rate": 6.824657534246575e-07, "log_odds_chosen": 1.8817088603973389, "log_odds_ratio": -0.3506516218185425, "logits/chosen": 0.46194761991500854, "logits/rejected": 0.3820837438106537, "logps/chosen": -1.6041027307510376, "logps/rejected": -3.333399534225464, "loss": 0.765, "nll_loss": 0.7298979759216309, "rewards/accuracies": 0.875, "rewards/chosen": -0.16041027009487152, "rewards/margins": 0.17292967438697815, "rewards/rejected": -0.3333399295806885, "step": 2318 }, { "epoch": 6.349075975359343, "grad_norm": 2.719377279281616, "learning_rate": 6.823287671232877e-07, "log_odds_chosen": 2.074990749359131, "log_odds_ratio": -0.21463903784751892, "logits/chosen": 0.5916540026664734, "logits/rejected": 0.5103338956832886, "logps/chosen": -1.701084852218628, "logps/rejected": -3.576371192932129, "loss": 0.7497, "nll_loss": 0.7282090187072754, "rewards/accuracies": 1.0, "rewards/chosen": -0.17010849714279175, "rewards/margins": 0.18752862513065338, "rewards/rejected": -0.35763710737228394, "step": 2319 }, { "epoch": 6.351813826146475, "grad_norm": 2.486354351043701, "learning_rate": 6.821917808219177e-07, "log_odds_chosen": 2.1385371685028076, "log_odds_ratio": -0.248566672205925, "logits/chosen": 0.9640347957611084, "logits/rejected": 0.9303820133209229, "logps/chosen": -1.693061351776123, "logps/rejected": -3.6381301879882812, "loss": 0.7178, "nll_loss": 0.6929014325141907, "rewards/accuracies": 1.0, "rewards/chosen": -0.16930612921714783, "rewards/margins": 0.19450688362121582, "rewards/rejected": -0.36381301283836365, "step": 2320 }, { "epoch": 6.354551676933607, "grad_norm": 4.049327373504639, "learning_rate": 6.820547945205479e-07, "log_odds_chosen": 2.5521371364593506, "log_odds_ratio": -0.17387792468070984, "logits/chosen": 1.0391870737075806, "logits/rejected": 1.024925708770752, "logps/chosen": -2.601130485534668, "logps/rejected": -5.036584377288818, "loss": 0.781, "nll_loss": 0.7635759115219116, "rewards/accuracies": 1.0, "rewards/chosen": -0.26011306047439575, "rewards/margins": 0.24354536831378937, "rewards/rejected": -0.5036584138870239, "step": 2321 }, { "epoch": 6.357289527720739, "grad_norm": 2.575704574584961, "learning_rate": 6.819178082191781e-07, "log_odds_chosen": 1.900005578994751, "log_odds_ratio": -0.24276214838027954, "logits/chosen": 0.615936815738678, "logits/rejected": 0.5859289169311523, "logps/chosen": -1.5303821563720703, "logps/rejected": -3.2468435764312744, "loss": 0.7175, "nll_loss": 0.693178117275238, "rewards/accuracies": 1.0, "rewards/chosen": -0.15303820371627808, "rewards/margins": 0.1716461330652237, "rewards/rejected": -0.32468435168266296, "step": 2322 }, { "epoch": 6.360027378507871, "grad_norm": 5.162142276763916, "learning_rate": 6.817808219178081e-07, "log_odds_chosen": 0.34604647755622864, "log_odds_ratio": -1.089125633239746, "logits/chosen": 0.974429726600647, "logits/rejected": 0.9614585041999817, "logps/chosen": -3.0468010902404785, "logps/rejected": -3.425723075866699, "loss": 0.8931, "nll_loss": 0.7841418981552124, "rewards/accuracies": 0.5, "rewards/chosen": -0.30468010902404785, "rewards/margins": 0.0378921702504158, "rewards/rejected": -0.34257230162620544, "step": 2323 }, { "epoch": 6.362765229295004, "grad_norm": 4.146618843078613, "learning_rate": 6.816438356164383e-07, "log_odds_chosen": 1.500121831893921, "log_odds_ratio": -0.30764368176460266, "logits/chosen": 0.588134765625, "logits/rejected": 0.5491057634353638, "logps/chosen": -2.1550965309143066, "logps/rejected": -3.53458833694458, "loss": 0.7993, "nll_loss": 0.7685604691505432, "rewards/accuracies": 0.875, "rewards/chosen": -0.21550965309143066, "rewards/margins": 0.13794919848442078, "rewards/rejected": -0.35345885157585144, "step": 2324 }, { "epoch": 6.365503080082135, "grad_norm": 2.850781202316284, "learning_rate": 6.815068493150684e-07, "log_odds_chosen": 1.793773889541626, "log_odds_ratio": -0.2943788766860962, "logits/chosen": 0.6702914237976074, "logits/rejected": 0.6131792068481445, "logps/chosen": -1.4878175258636475, "logps/rejected": -3.1411709785461426, "loss": 0.6806, "nll_loss": 0.6511297225952148, "rewards/accuracies": 0.875, "rewards/chosen": -0.14878176152706146, "rewards/margins": 0.16533534228801727, "rewards/rejected": -0.31411710381507874, "step": 2325 }, { "epoch": 6.368240930869268, "grad_norm": 4.320094108581543, "learning_rate": 6.813698630136986e-07, "log_odds_chosen": 1.2953380346298218, "log_odds_ratio": -0.4572025537490845, "logits/chosen": 0.8395779132843018, "logits/rejected": 0.8521935343742371, "logps/chosen": -2.0482492446899414, "logps/rejected": -3.1836769580841064, "loss": 0.6907, "nll_loss": 0.6449708938598633, "rewards/accuracies": 0.875, "rewards/chosen": -0.20482493937015533, "rewards/margins": 0.11354275792837143, "rewards/rejected": -0.31836768984794617, "step": 2326 }, { "epoch": 6.3709787816564, "grad_norm": 3.087292194366455, "learning_rate": 6.812328767123287e-07, "log_odds_chosen": 2.494220733642578, "log_odds_ratio": -0.36581477522850037, "logits/chosen": 0.6332356333732605, "logits/rejected": 0.6404166221618652, "logps/chosen": -1.388641119003296, "logps/rejected": -3.6435492038726807, "loss": 0.6445, "nll_loss": 0.6079471111297607, "rewards/accuracies": 0.75, "rewards/chosen": -0.13886412978172302, "rewards/margins": 0.22549079358577728, "rewards/rejected": -0.3643549084663391, "step": 2327 }, { "epoch": 6.373716632443532, "grad_norm": 3.692298650741577, "learning_rate": 6.810958904109588e-07, "log_odds_chosen": 0.8911269903182983, "log_odds_ratio": -0.4567314684391022, "logits/chosen": 0.3886684477329254, "logits/rejected": 0.3822641968727112, "logps/chosen": -2.4152259826660156, "logps/rejected": -3.2166647911071777, "loss": 0.8723, "nll_loss": 0.8266105651855469, "rewards/accuracies": 0.875, "rewards/chosen": -0.24152261018753052, "rewards/margins": 0.08014386892318726, "rewards/rejected": -0.3216664791107178, "step": 2328 }, { "epoch": 6.376454483230664, "grad_norm": 2.787684679031372, "learning_rate": 6.80958904109589e-07, "log_odds_chosen": 0.8359689712524414, "log_odds_ratio": -0.4198688268661499, "logits/chosen": 0.6901678442955017, "logits/rejected": 0.7077772617340088, "logps/chosen": -2.008967399597168, "logps/rejected": -2.7489984035491943, "loss": 0.7685, "nll_loss": 0.7265485525131226, "rewards/accuracies": 0.75, "rewards/chosen": -0.2008967399597168, "rewards/margins": 0.07400311529636383, "rewards/rejected": -0.27489984035491943, "step": 2329 }, { "epoch": 6.379192334017796, "grad_norm": 2.9903268814086914, "learning_rate": 6.808219178082191e-07, "log_odds_chosen": 2.1481595039367676, "log_odds_ratio": -0.33265602588653564, "logits/chosen": 1.062825083732605, "logits/rejected": 1.043728232383728, "logps/chosen": -1.6730396747589111, "logps/rejected": -3.700227737426758, "loss": 0.6757, "nll_loss": 0.6424350738525391, "rewards/accuracies": 1.0, "rewards/chosen": -0.16730396449565887, "rewards/margins": 0.2027187943458557, "rewards/rejected": -0.3700227737426758, "step": 2330 }, { "epoch": 6.381930184804928, "grad_norm": 3.2825443744659424, "learning_rate": 6.806849315068492e-07, "log_odds_chosen": 2.6467058658599854, "log_odds_ratio": -0.5231412649154663, "logits/chosen": 0.7062901854515076, "logits/rejected": 0.6395981311798096, "logps/chosen": -1.971414566040039, "logps/rejected": -4.4950151443481445, "loss": 0.8031, "nll_loss": 0.7507448196411133, "rewards/accuracies": 0.875, "rewards/chosen": -0.19714143872261047, "rewards/margins": 0.2523600459098816, "rewards/rejected": -0.44950151443481445, "step": 2331 }, { "epoch": 6.38466803559206, "grad_norm": 2.679279327392578, "learning_rate": 6.805479452054794e-07, "log_odds_chosen": 1.7896177768707275, "log_odds_ratio": -0.360468327999115, "logits/chosen": 0.9733225703239441, "logits/rejected": 0.9724789261817932, "logps/chosen": -1.8185502290725708, "logps/rejected": -3.499289035797119, "loss": 0.7177, "nll_loss": 0.681626558303833, "rewards/accuracies": 0.75, "rewards/chosen": -0.18185502290725708, "rewards/margins": 0.1680738925933838, "rewards/rejected": -0.34992891550064087, "step": 2332 }, { "epoch": 6.3874058863791925, "grad_norm": 2.701931953430176, "learning_rate": 6.804109589041097e-07, "log_odds_chosen": 3.8083183765411377, "log_odds_ratio": -0.11807706207036972, "logits/chosen": 0.7799926400184631, "logits/rejected": 0.744960367679596, "logps/chosen": -1.2750437259674072, "logps/rejected": -4.728569030761719, "loss": 0.7329, "nll_loss": 0.7211220264434814, "rewards/accuracies": 1.0, "rewards/chosen": -0.127504363656044, "rewards/margins": 0.34535253047943115, "rewards/rejected": -0.47285687923431396, "step": 2333 }, { "epoch": 6.3901437371663246, "grad_norm": 2.7581589221954346, "learning_rate": 6.802739726027396e-07, "log_odds_chosen": 2.2235255241394043, "log_odds_ratio": -0.3023439049720764, "logits/chosen": 0.6893725991249084, "logits/rejected": 0.6125781536102295, "logps/chosen": -2.1072001457214355, "logps/rejected": -4.191712856292725, "loss": 0.7853, "nll_loss": 0.7550349235534668, "rewards/accuracies": 0.875, "rewards/chosen": -0.2107200026512146, "rewards/margins": 0.2084512710571289, "rewards/rejected": -0.4191712737083435, "step": 2334 }, { "epoch": 6.392881587953457, "grad_norm": 2.502786874771118, "learning_rate": 6.801369863013698e-07, "log_odds_chosen": 1.144851803779602, "log_odds_ratio": -0.3813973665237427, "logits/chosen": 0.915524423122406, "logits/rejected": 0.8669277429580688, "logps/chosen": -1.3340264558792114, "logps/rejected": -2.1899867057800293, "loss": 0.6896, "nll_loss": 0.651450514793396, "rewards/accuracies": 0.875, "rewards/chosen": -0.13340264558792114, "rewards/margins": 0.08559602499008179, "rewards/rejected": -0.21899867057800293, "step": 2335 }, { "epoch": 6.395619438740589, "grad_norm": 3.2184109687805176, "learning_rate": 6.800000000000001e-07, "log_odds_chosen": 1.182370901107788, "log_odds_ratio": -0.39291784167289734, "logits/chosen": 0.9306902885437012, "logits/rejected": 0.9207070469856262, "logps/chosen": -1.9591001272201538, "logps/rejected": -2.9794678688049316, "loss": 0.6302, "nll_loss": 0.5908918380737305, "rewards/accuracies": 0.875, "rewards/chosen": -0.1959100216627121, "rewards/margins": 0.10203677415847778, "rewards/rejected": -0.2979468107223511, "step": 2336 }, { "epoch": 6.398357289527721, "grad_norm": 2.8114142417907715, "learning_rate": 6.7986301369863e-07, "log_odds_chosen": 3.2889420986175537, "log_odds_ratio": -0.13441826403141022, "logits/chosen": 0.7257737517356873, "logits/rejected": 0.6717032790184021, "logps/chosen": -1.5203303098678589, "logps/rejected": -4.529356479644775, "loss": 0.6784, "nll_loss": 0.6649982929229736, "rewards/accuracies": 1.0, "rewards/chosen": -0.15203304588794708, "rewards/margins": 0.3009026050567627, "rewards/rejected": -0.4529356360435486, "step": 2337 }, { "epoch": 6.401095140314853, "grad_norm": 2.519365072250366, "learning_rate": 6.797260273972603e-07, "log_odds_chosen": 2.196362018585205, "log_odds_ratio": -0.1896059811115265, "logits/chosen": 0.9595463275909424, "logits/rejected": 0.9403812289237976, "logps/chosen": -1.6669725179672241, "logps/rejected": -3.6807353496551514, "loss": 0.655, "nll_loss": 0.6360303163528442, "rewards/accuracies": 1.0, "rewards/chosen": -0.16669723391532898, "rewards/margins": 0.201376274228096, "rewards/rejected": -0.3680735230445862, "step": 2338 }, { "epoch": 6.403832991101985, "grad_norm": 2.646428108215332, "learning_rate": 6.795890410958904e-07, "log_odds_chosen": 1.7135343551635742, "log_odds_ratio": -0.2678337097167969, "logits/chosen": 0.9721808433532715, "logits/rejected": 0.8968693614006042, "logps/chosen": -1.730069875717163, "logps/rejected": -3.27970290184021, "loss": 0.7116, "nll_loss": 0.6848328113555908, "rewards/accuracies": 1.0, "rewards/chosen": -0.17300698161125183, "rewards/margins": 0.15496335923671722, "rewards/rejected": -0.32797032594680786, "step": 2339 }, { "epoch": 6.406570841889117, "grad_norm": 2.8999581336975098, "learning_rate": 6.794520547945205e-07, "log_odds_chosen": 1.848111867904663, "log_odds_ratio": -0.3587135672569275, "logits/chosen": 0.9138290882110596, "logits/rejected": 0.872346043586731, "logps/chosen": -2.030529022216797, "logps/rejected": -3.797633647918701, "loss": 0.8309, "nll_loss": 0.7950370907783508, "rewards/accuracies": 0.875, "rewards/chosen": -0.20305289328098297, "rewards/margins": 0.17671047151088715, "rewards/rejected": -0.3797633647918701, "step": 2340 }, { "epoch": 6.409308692676249, "grad_norm": 2.73681378364563, "learning_rate": 6.793150684931507e-07, "log_odds_chosen": 2.89249849319458, "log_odds_ratio": -0.15333229303359985, "logits/chosen": 0.8427739143371582, "logits/rejected": 0.8197864890098572, "logps/chosen": -1.7627774477005005, "logps/rejected": -4.467282295227051, "loss": 0.6744, "nll_loss": 0.6590752005577087, "rewards/accuracies": 1.0, "rewards/chosen": -0.1762777417898178, "rewards/margins": 0.2704504728317261, "rewards/rejected": -0.4467282295227051, "step": 2341 }, { "epoch": 6.412046543463381, "grad_norm": 2.959645986557007, "learning_rate": 6.791780821917808e-07, "log_odds_chosen": 2.4667513370513916, "log_odds_ratio": -0.2499515414237976, "logits/chosen": 0.8570132255554199, "logits/rejected": 0.8670639991760254, "logps/chosen": -1.846915364265442, "logps/rejected": -4.159540176391602, "loss": 0.6836, "nll_loss": 0.6586253643035889, "rewards/accuracies": 0.875, "rewards/chosen": -0.18469154834747314, "rewards/margins": 0.2312624752521515, "rewards/rejected": -0.41595399379730225, "step": 2342 }, { "epoch": 6.414784394250513, "grad_norm": 2.3996031284332275, "learning_rate": 6.79041095890411e-07, "log_odds_chosen": 2.3327629566192627, "log_odds_ratio": -0.16970258951187134, "logits/chosen": 0.8644260168075562, "logits/rejected": 0.8766428828239441, "logps/chosen": -1.46994149684906, "logps/rejected": -3.5496888160705566, "loss": 0.6362, "nll_loss": 0.6192157864570618, "rewards/accuracies": 1.0, "rewards/chosen": -0.14699414372444153, "rewards/margins": 0.20797476172447205, "rewards/rejected": -0.3549689054489136, "step": 2343 }, { "epoch": 6.417522245037645, "grad_norm": 2.8540382385253906, "learning_rate": 6.789041095890411e-07, "log_odds_chosen": 1.8544679880142212, "log_odds_ratio": -0.2228703796863556, "logits/chosen": 0.4904581606388092, "logits/rejected": 0.41104111075401306, "logps/chosen": -1.90596604347229, "logps/rejected": -3.625523090362549, "loss": 0.7167, "nll_loss": 0.694377064704895, "rewards/accuracies": 1.0, "rewards/chosen": -0.19059662520885468, "rewards/margins": 0.17195568978786469, "rewards/rejected": -0.36255231499671936, "step": 2344 }, { "epoch": 6.420260095824777, "grad_norm": 2.9264256954193115, "learning_rate": 6.787671232876712e-07, "log_odds_chosen": 2.2565877437591553, "log_odds_ratio": -0.21971528232097626, "logits/chosen": 0.6444320678710938, "logits/rejected": 0.5784750580787659, "logps/chosen": -1.8903461694717407, "logps/rejected": -3.829599380493164, "loss": 0.7035, "nll_loss": 0.6814978718757629, "rewards/accuracies": 1.0, "rewards/chosen": -0.1890346258878708, "rewards/margins": 0.19392532110214233, "rewards/rejected": -0.3829599618911743, "step": 2345 }, { "epoch": 6.422997946611909, "grad_norm": 8.518451690673828, "learning_rate": 6.786301369863014e-07, "log_odds_chosen": 0.4288206100463867, "log_odds_ratio": -0.9238074421882629, "logits/chosen": 0.761177122592926, "logits/rejected": 0.7242653965950012, "logps/chosen": -2.2302024364471436, "logps/rejected": -2.5564115047454834, "loss": 0.7757, "nll_loss": 0.6833041310310364, "rewards/accuracies": 0.75, "rewards/chosen": -0.22302022576332092, "rewards/margins": 0.03262091055512428, "rewards/rejected": -0.2556411623954773, "step": 2346 }, { "epoch": 6.425735797399041, "grad_norm": 3.0901291370391846, "learning_rate": 6.784931506849315e-07, "log_odds_chosen": 2.6550636291503906, "log_odds_ratio": -0.20790144801139832, "logits/chosen": 0.8598112463951111, "logits/rejected": 0.8915337920188904, "logps/chosen": -2.053436517715454, "logps/rejected": -4.5941033363342285, "loss": 0.7271, "nll_loss": 0.7063297033309937, "rewards/accuracies": 1.0, "rewards/chosen": -0.20534366369247437, "rewards/margins": 0.25406667590141296, "rewards/rejected": -0.45941033959388733, "step": 2347 }, { "epoch": 6.4284736481861735, "grad_norm": 2.927110195159912, "learning_rate": 6.783561643835616e-07, "log_odds_chosen": 2.280545473098755, "log_odds_ratio": -0.2623579800128937, "logits/chosen": 1.0163016319274902, "logits/rejected": 1.0141551494598389, "logps/chosen": -1.951754093170166, "logps/rejected": -4.1325154304504395, "loss": 0.6739, "nll_loss": 0.6476401686668396, "rewards/accuracies": 0.875, "rewards/chosen": -0.1951754242181778, "rewards/margins": 0.21807612478733063, "rewards/rejected": -0.4132515490055084, "step": 2348 }, { "epoch": 6.431211498973306, "grad_norm": 4.873116970062256, "learning_rate": 6.782191780821918e-07, "log_odds_chosen": 3.579955577850342, "log_odds_ratio": -0.28353023529052734, "logits/chosen": 1.0039234161376953, "logits/rejected": 1.0381664037704468, "logps/chosen": -2.606781005859375, "logps/rejected": -6.0995192527771, "loss": 0.7385, "nll_loss": 0.7101113796234131, "rewards/accuracies": 0.875, "rewards/chosen": -0.26067811250686646, "rewards/margins": 0.3492738604545593, "rewards/rejected": -0.6099519729614258, "step": 2349 }, { "epoch": 6.433949349760438, "grad_norm": 4.307765007019043, "learning_rate": 6.78082191780822e-07, "log_odds_chosen": 1.7959668636322021, "log_odds_ratio": -0.6151034235954285, "logits/chosen": 0.6704184412956238, "logits/rejected": 0.6934452056884766, "logps/chosen": -2.3410804271698, "logps/rejected": -4.038161754608154, "loss": 0.8699, "nll_loss": 0.8084385395050049, "rewards/accuracies": 0.875, "rewards/chosen": -0.2341080605983734, "rewards/margins": 0.16970813274383545, "rewards/rejected": -0.4038161635398865, "step": 2350 }, { "epoch": 6.436687200547571, "grad_norm": 3.0891220569610596, "learning_rate": 6.77945205479452e-07, "log_odds_chosen": 1.9965797662734985, "log_odds_ratio": -0.3056629002094269, "logits/chosen": 0.8166289329528809, "logits/rejected": 0.8317553997039795, "logps/chosen": -2.248961925506592, "logps/rejected": -4.179890155792236, "loss": 0.808, "nll_loss": 0.7774831056594849, "rewards/accuracies": 0.875, "rewards/chosen": -0.22489619255065918, "rewards/margins": 0.19309279322624207, "rewards/rejected": -0.41798901557922363, "step": 2351 }, { "epoch": 6.439425051334703, "grad_norm": 3.007981061935425, "learning_rate": 6.778082191780822e-07, "log_odds_chosen": 2.323955535888672, "log_odds_ratio": -0.23363107442855835, "logits/chosen": 0.8390114307403564, "logits/rejected": 0.8315629363059998, "logps/chosen": -2.1423234939575195, "logps/rejected": -4.349091529846191, "loss": 0.7393, "nll_loss": 0.7159165740013123, "rewards/accuracies": 1.0, "rewards/chosen": -0.21423234045505524, "rewards/margins": 0.22067680954933167, "rewards/rejected": -0.4349091351032257, "step": 2352 }, { "epoch": 6.442162902121835, "grad_norm": 2.879112720489502, "learning_rate": 6.776712328767124e-07, "log_odds_chosen": 2.3419036865234375, "log_odds_ratio": -0.24020667374134064, "logits/chosen": 0.9351500868797302, "logits/rejected": 0.909055769443512, "logps/chosen": -1.5756123065948486, "logps/rejected": -3.6292717456817627, "loss": 0.6786, "nll_loss": 0.6545988917350769, "rewards/accuracies": 0.875, "rewards/chosen": -0.15756124258041382, "rewards/margins": 0.20536594092845917, "rewards/rejected": -0.3629271686077118, "step": 2353 }, { "epoch": 6.444900752908967, "grad_norm": 2.811424732208252, "learning_rate": 6.775342465753424e-07, "log_odds_chosen": 2.4073588848114014, "log_odds_ratio": -0.24922829866409302, "logits/chosen": 0.7396307587623596, "logits/rejected": 0.7025895118713379, "logps/chosen": -1.9880515336990356, "logps/rejected": -4.1967267990112305, "loss": 0.7277, "nll_loss": 0.7028067111968994, "rewards/accuracies": 0.875, "rewards/chosen": -0.19880515336990356, "rewards/margins": 0.22086752951145172, "rewards/rejected": -0.4196726977825165, "step": 2354 }, { "epoch": 6.447638603696099, "grad_norm": 3.1689653396606445, "learning_rate": 6.773972602739726e-07, "log_odds_chosen": 1.3118574619293213, "log_odds_ratio": -0.3346182703971863, "logits/chosen": 0.7950366139411926, "logits/rejected": 0.7760432958602905, "logps/chosen": -1.754094123840332, "logps/rejected": -2.8991189002990723, "loss": 0.789, "nll_loss": 0.755547046661377, "rewards/accuracies": 0.875, "rewards/chosen": -0.1754094362258911, "rewards/margins": 0.11450247466564178, "rewards/rejected": -0.2899118959903717, "step": 2355 }, { "epoch": 6.450376454483231, "grad_norm": 3.2343993186950684, "learning_rate": 6.772602739726027e-07, "log_odds_chosen": 1.3708032369613647, "log_odds_ratio": -0.43463054299354553, "logits/chosen": 0.8165902495384216, "logits/rejected": 0.8212199807167053, "logps/chosen": -1.5365016460418701, "logps/rejected": -2.7926363945007324, "loss": 0.7568, "nll_loss": 0.7133449912071228, "rewards/accuracies": 0.75, "rewards/chosen": -0.153650164604187, "rewards/margins": 0.12561346590518951, "rewards/rejected": -0.27926361560821533, "step": 2356 }, { "epoch": 6.453114305270363, "grad_norm": 6.406494140625, "learning_rate": 6.771232876712329e-07, "log_odds_chosen": 1.4694621562957764, "log_odds_ratio": -0.6476963758468628, "logits/chosen": 0.7465289831161499, "logits/rejected": 0.6972489953041077, "logps/chosen": -2.6201822757720947, "logps/rejected": -3.9737629890441895, "loss": 0.8196, "nll_loss": 0.7548273205757141, "rewards/accuracies": 0.75, "rewards/chosen": -0.26201820373535156, "rewards/margins": 0.135358065366745, "rewards/rejected": -0.39737629890441895, "step": 2357 }, { "epoch": 6.455852156057495, "grad_norm": 2.351593494415283, "learning_rate": 6.76986301369863e-07, "log_odds_chosen": 4.0044660568237305, "log_odds_ratio": -0.032097846269607544, "logits/chosen": 0.8421136140823364, "logits/rejected": 0.7873022556304932, "logps/chosen": -1.946550726890564, "logps/rejected": -5.754901885986328, "loss": 0.6918, "nll_loss": 0.6886226534843445, "rewards/accuracies": 1.0, "rewards/chosen": -0.19465507566928864, "rewards/margins": 0.3808351159095764, "rewards/rejected": -0.5754901766777039, "step": 2358 }, { "epoch": 6.458590006844627, "grad_norm": 3.3612077236175537, "learning_rate": 6.768493150684931e-07, "log_odds_chosen": 1.6431691646575928, "log_odds_ratio": -0.4506865441799164, "logits/chosen": 0.7413865327835083, "logits/rejected": 0.7393252849578857, "logps/chosen": -1.8833056688308716, "logps/rejected": -3.480778455734253, "loss": 0.7113, "nll_loss": 0.6662372350692749, "rewards/accuracies": 0.75, "rewards/chosen": -0.18833056092262268, "rewards/margins": 0.15974728763103485, "rewards/rejected": -0.3480778634548187, "step": 2359 }, { "epoch": 6.461327857631759, "grad_norm": 2.5342636108398438, "learning_rate": 6.767123287671233e-07, "log_odds_chosen": 2.2860240936279297, "log_odds_ratio": -0.25858741998672485, "logits/chosen": 0.9226419925689697, "logits/rejected": 0.8831920027732849, "logps/chosen": -2.1290154457092285, "logps/rejected": -4.312133312225342, "loss": 0.6711, "nll_loss": 0.6452836990356445, "rewards/accuracies": 1.0, "rewards/chosen": -0.21290156245231628, "rewards/margins": 0.21831178665161133, "rewards/rejected": -0.4312133491039276, "step": 2360 }, { "epoch": 6.464065708418891, "grad_norm": 3.347273588180542, "learning_rate": 6.765753424657534e-07, "log_odds_chosen": 2.357722282409668, "log_odds_ratio": -0.25055888295173645, "logits/chosen": 0.9660165309906006, "logits/rejected": 1.004417896270752, "logps/chosen": -2.977478504180908, "logps/rejected": -5.266910076141357, "loss": 0.719, "nll_loss": 0.6939870119094849, "rewards/accuracies": 0.875, "rewards/chosen": -0.2977478504180908, "rewards/margins": 0.22894316911697388, "rewards/rejected": -0.5266910195350647, "step": 2361 }, { "epoch": 6.466803559206023, "grad_norm": 2.370790481567383, "learning_rate": 6.764383561643835e-07, "log_odds_chosen": 3.190882921218872, "log_odds_ratio": -0.1741236448287964, "logits/chosen": 0.7905693650245667, "logits/rejected": 0.7501664161682129, "logps/chosen": -1.3300927877426147, "logps/rejected": -4.228713035583496, "loss": 0.6894, "nll_loss": 0.6719700694084167, "rewards/accuracies": 0.875, "rewards/chosen": -0.13300926983356476, "rewards/margins": 0.2898620665073395, "rewards/rejected": -0.42287135124206543, "step": 2362 }, { "epoch": 6.469541409993155, "grad_norm": 2.906031370162964, "learning_rate": 6.763013698630137e-07, "log_odds_chosen": 1.9838943481445312, "log_odds_ratio": -0.29067015647888184, "logits/chosen": 0.7793301343917847, "logits/rejected": 0.7053311467170715, "logps/chosen": -1.8369512557983398, "logps/rejected": -3.622769355773926, "loss": 0.7105, "nll_loss": 0.6814499497413635, "rewards/accuracies": 0.875, "rewards/chosen": -0.18369512259960175, "rewards/margins": 0.17858180403709412, "rewards/rejected": -0.36227691173553467, "step": 2363 }, { "epoch": 6.4722792607802875, "grad_norm": 3.3143270015716553, "learning_rate": 6.761643835616439e-07, "log_odds_chosen": 2.2712643146514893, "log_odds_ratio": -0.20073941349983215, "logits/chosen": 0.724541187286377, "logits/rejected": 0.6905710697174072, "logps/chosen": -1.6791331768035889, "logps/rejected": -3.773648262023926, "loss": 0.7628, "nll_loss": 0.7427729964256287, "rewards/accuracies": 1.0, "rewards/chosen": -0.1679133176803589, "rewards/margins": 0.20945151150226593, "rewards/rejected": -0.37736478447914124, "step": 2364 }, { "epoch": 6.4750171115674195, "grad_norm": 3.8213307857513428, "learning_rate": 6.760273972602739e-07, "log_odds_chosen": 0.47732484340667725, "log_odds_ratio": -0.7102292776107788, "logits/chosen": 0.6535650491714478, "logits/rejected": 0.6015732884407043, "logps/chosen": -2.283797264099121, "logps/rejected": -2.7404580116271973, "loss": 0.7344, "nll_loss": 0.6633551716804504, "rewards/accuracies": 0.625, "rewards/chosen": -0.2283797264099121, "rewards/margins": 0.04566608741879463, "rewards/rejected": -0.27404582500457764, "step": 2365 }, { "epoch": 6.477754962354552, "grad_norm": 3.0147078037261963, "learning_rate": 6.758904109589041e-07, "log_odds_chosen": 2.039641857147217, "log_odds_ratio": -0.31105712056159973, "logits/chosen": 0.6651233434677124, "logits/rejected": 0.6327387690544128, "logps/chosen": -1.8973617553710938, "logps/rejected": -3.830331325531006, "loss": 0.7559, "nll_loss": 0.7247927188873291, "rewards/accuracies": 0.875, "rewards/chosen": -0.18973617255687714, "rewards/margins": 0.19329698383808136, "rewards/rejected": -0.3830331861972809, "step": 2366 }, { "epoch": 6.480492813141684, "grad_norm": 4.336022853851318, "learning_rate": 6.757534246575343e-07, "log_odds_chosen": 2.347748041152954, "log_odds_ratio": -0.5172489881515503, "logits/chosen": 0.6388921737670898, "logits/rejected": 0.6655924916267395, "logps/chosen": -2.7810144424438477, "logps/rejected": -4.993843078613281, "loss": 0.8296, "nll_loss": 0.7778877019882202, "rewards/accuracies": 0.75, "rewards/chosen": -0.27810144424438477, "rewards/margins": 0.22128286957740784, "rewards/rejected": -0.4993843138217926, "step": 2367 }, { "epoch": 6.483230663928816, "grad_norm": 5.524272441864014, "learning_rate": 6.756164383561643e-07, "log_odds_chosen": 0.7212350964546204, "log_odds_ratio": -0.6625586748123169, "logits/chosen": 0.7818963527679443, "logits/rejected": 0.7801859378814697, "logps/chosen": -2.851215362548828, "logps/rejected": -3.5072360038757324, "loss": 0.8772, "nll_loss": 0.8109809160232544, "rewards/accuracies": 0.75, "rewards/chosen": -0.28512150049209595, "rewards/margins": 0.06560205668210983, "rewards/rejected": -0.35072359442710876, "step": 2368 }, { "epoch": 6.485968514715948, "grad_norm": 2.539386034011841, "learning_rate": 6.754794520547945e-07, "log_odds_chosen": 1.6142076253890991, "log_odds_ratio": -0.3194712996482849, "logits/chosen": 0.9732389450073242, "logits/rejected": 0.9099653363227844, "logps/chosen": -1.590505599975586, "logps/rejected": -3.063288927078247, "loss": 0.7206, "nll_loss": 0.6886136531829834, "rewards/accuracies": 0.875, "rewards/chosen": -0.1590505689382553, "rewards/margins": 0.1472783088684082, "rewards/rejected": -0.3063288927078247, "step": 2369 }, { "epoch": 6.48870636550308, "grad_norm": 3.5751633644104004, "learning_rate": 6.753424657534246e-07, "log_odds_chosen": 2.6425554752349854, "log_odds_ratio": -0.3423382639884949, "logits/chosen": 1.0457831621170044, "logits/rejected": 1.0934102535247803, "logps/chosen": -2.2218031883239746, "logps/rejected": -4.743828773498535, "loss": 0.7328, "nll_loss": 0.6985549330711365, "rewards/accuracies": 0.875, "rewards/chosen": -0.2221803069114685, "rewards/margins": 0.252202570438385, "rewards/rejected": -0.4743828773498535, "step": 2370 }, { "epoch": 6.491444216290212, "grad_norm": 2.7315306663513184, "learning_rate": 6.752054794520548e-07, "log_odds_chosen": 1.8683379888534546, "log_odds_ratio": -0.3135564923286438, "logits/chosen": 1.047223687171936, "logits/rejected": 1.0539164543151855, "logps/chosen": -1.9204516410827637, "logps/rejected": -3.65541934967041, "loss": 0.673, "nll_loss": 0.6416351795196533, "rewards/accuracies": 0.875, "rewards/chosen": -0.1920451670885086, "rewards/margins": 0.1734967678785324, "rewards/rejected": -0.365541934967041, "step": 2371 }, { "epoch": 6.494182067077344, "grad_norm": 2.8511760234832764, "learning_rate": 6.750684931506849e-07, "log_odds_chosen": 1.1166884899139404, "log_odds_ratio": -0.32988616824150085, "logits/chosen": 0.7110070586204529, "logits/rejected": 0.6728066205978394, "logps/chosen": -1.3686940670013428, "logps/rejected": -2.2734360694885254, "loss": 0.6735, "nll_loss": 0.6405526995658875, "rewards/accuracies": 1.0, "rewards/chosen": -0.136869415640831, "rewards/margins": 0.0904742032289505, "rewards/rejected": -0.2273436188697815, "step": 2372 }, { "epoch": 6.496919917864476, "grad_norm": 3.1501474380493164, "learning_rate": 6.74931506849315e-07, "log_odds_chosen": 3.203976631164551, "log_odds_ratio": -0.1555468589067459, "logits/chosen": 0.8827877044677734, "logits/rejected": 0.8797421455383301, "logps/chosen": -2.325685977935791, "logps/rejected": -5.392836093902588, "loss": 0.6912, "nll_loss": 0.6756713390350342, "rewards/accuracies": 1.0, "rewards/chosen": -0.23256860673427582, "rewards/margins": 0.30671507120132446, "rewards/rejected": -0.5392836332321167, "step": 2373 }, { "epoch": 6.499657768651608, "grad_norm": 3.8683576583862305, "learning_rate": 6.747945205479452e-07, "log_odds_chosen": 0.8810856342315674, "log_odds_ratio": -0.506586492061615, "logits/chosen": 0.7130168080329895, "logits/rejected": 0.687155544757843, "logps/chosen": -1.8372249603271484, "logps/rejected": -2.622633695602417, "loss": 0.8089, "nll_loss": 0.7582084536552429, "rewards/accuracies": 0.75, "rewards/chosen": -0.18372251093387604, "rewards/margins": 0.0785408765077591, "rewards/rejected": -0.26226338744163513, "step": 2374 }, { "epoch": 6.50239561943874, "grad_norm": 2.7532520294189453, "learning_rate": 6.746575342465753e-07, "log_odds_chosen": 3.7423698902130127, "log_odds_ratio": -0.13815432786941528, "logits/chosen": 1.2216391563415527, "logits/rejected": 1.2440359592437744, "logps/chosen": -2.7378439903259277, "logps/rejected": -6.3476433753967285, "loss": 0.6272, "nll_loss": 0.6134011745452881, "rewards/accuracies": 1.0, "rewards/chosen": -0.2737843990325928, "rewards/margins": 0.36097991466522217, "rewards/rejected": -0.6347643136978149, "step": 2375 }, { "epoch": 6.505133470225873, "grad_norm": 7.019207954406738, "learning_rate": 6.745205479452054e-07, "log_odds_chosen": 0.6006907820701599, "log_odds_ratio": -0.6036322712898254, "logits/chosen": 0.8458437919616699, "logits/rejected": 0.8168321251869202, "logps/chosen": -1.6293282508850098, "logps/rejected": -2.0447916984558105, "loss": 0.6951, "nll_loss": 0.6347858309745789, "rewards/accuracies": 0.75, "rewards/chosen": -0.16293282806873322, "rewards/margins": 0.041546352207660675, "rewards/rejected": -0.2044791877269745, "step": 2376 }, { "epoch": 6.507871321013004, "grad_norm": 3.5887811183929443, "learning_rate": 6.743835616438356e-07, "log_odds_chosen": 2.5933384895324707, "log_odds_ratio": -0.2095080316066742, "logits/chosen": 0.7899689078330994, "logits/rejected": 0.7814540266990662, "logps/chosen": -2.3632972240448, "logps/rejected": -4.8508124351501465, "loss": 0.9047, "nll_loss": 0.8837518095970154, "rewards/accuracies": 0.875, "rewards/chosen": -0.23632973432540894, "rewards/margins": 0.24875153601169586, "rewards/rejected": -0.4850812554359436, "step": 2377 }, { "epoch": 6.510609171800137, "grad_norm": 2.4737460613250732, "learning_rate": 6.742465753424658e-07, "log_odds_chosen": 1.8172249794006348, "log_odds_ratio": -0.3106350004673004, "logits/chosen": 0.8110912442207336, "logits/rejected": 0.81200110912323, "logps/chosen": -1.9466181993484497, "logps/rejected": -3.6561479568481445, "loss": 0.705, "nll_loss": 0.6739242672920227, "rewards/accuracies": 1.0, "rewards/chosen": -0.19466182589530945, "rewards/margins": 0.1709529608488083, "rewards/rejected": -0.36561480164527893, "step": 2378 }, { "epoch": 6.5133470225872685, "grad_norm": 2.4609289169311523, "learning_rate": 6.741095890410958e-07, "log_odds_chosen": 3.283475637435913, "log_odds_ratio": -0.23770058155059814, "logits/chosen": 1.0963081121444702, "logits/rejected": 1.1041960716247559, "logps/chosen": -1.7302014827728271, "logps/rejected": -4.879510879516602, "loss": 0.6166, "nll_loss": 0.5927872657775879, "rewards/accuracies": 0.875, "rewards/chosen": -0.1730201542377472, "rewards/margins": 0.3149309456348419, "rewards/rejected": -0.4879510700702667, "step": 2379 }, { "epoch": 6.5160848733744015, "grad_norm": 3.8285791873931885, "learning_rate": 6.73972602739726e-07, "log_odds_chosen": 0.8338668346405029, "log_odds_ratio": -0.5909060835838318, "logits/chosen": 0.6794264912605286, "logits/rejected": 0.6743766069412231, "logps/chosen": -2.6416051387786865, "logps/rejected": -3.3433666229248047, "loss": 0.7986, "nll_loss": 0.7394770383834839, "rewards/accuracies": 0.75, "rewards/chosen": -0.26416051387786865, "rewards/margins": 0.0701761543750763, "rewards/rejected": -0.33433669805526733, "step": 2380 }, { "epoch": 6.5188227241615335, "grad_norm": 3.1409854888916016, "learning_rate": 6.738356164383562e-07, "log_odds_chosen": 2.437640428543091, "log_odds_ratio": -0.26667845249176025, "logits/chosen": 0.9530853033065796, "logits/rejected": 0.951790452003479, "logps/chosen": -2.836071014404297, "logps/rejected": -5.101939678192139, "loss": 0.7014, "nll_loss": 0.6747673749923706, "rewards/accuracies": 0.875, "rewards/chosen": -0.2836071252822876, "rewards/margins": 0.22658684849739075, "rewards/rejected": -0.5101940035820007, "step": 2381 }, { "epoch": 6.521560574948666, "grad_norm": 2.9771499633789062, "learning_rate": 6.736986301369862e-07, "log_odds_chosen": 1.822608470916748, "log_odds_ratio": -0.26597604155540466, "logits/chosen": 0.8049546480178833, "logits/rejected": 0.7473430633544922, "logps/chosen": -1.914550542831421, "logps/rejected": -3.569045305252075, "loss": 0.7538, "nll_loss": 0.7272169589996338, "rewards/accuracies": 1.0, "rewards/chosen": -0.19145505130290985, "rewards/margins": 0.16544948518276215, "rewards/rejected": -0.3569045066833496, "step": 2382 }, { "epoch": 6.524298425735798, "grad_norm": 3.2922542095184326, "learning_rate": 6.735616438356164e-07, "log_odds_chosen": 1.1844749450683594, "log_odds_ratio": -0.6387260556221008, "logits/chosen": 1.0995534658432007, "logits/rejected": 1.1387412548065186, "logps/chosen": -2.613107681274414, "logps/rejected": -3.7429468631744385, "loss": 0.7894, "nll_loss": 0.7255147695541382, "rewards/accuracies": 0.75, "rewards/chosen": -0.26131075620651245, "rewards/margins": 0.11298392713069916, "rewards/rejected": -0.3742946684360504, "step": 2383 }, { "epoch": 6.52703627652293, "grad_norm": 2.9993484020233154, "learning_rate": 6.734246575342466e-07, "log_odds_chosen": 1.8336446285247803, "log_odds_ratio": -0.34382712841033936, "logits/chosen": 0.6086309552192688, "logits/rejected": 0.492022305727005, "logps/chosen": -1.4808529615402222, "logps/rejected": -3.155926465988159, "loss": 0.7726, "nll_loss": 0.7382165193557739, "rewards/accuracies": 0.875, "rewards/chosen": -0.14808529615402222, "rewards/margins": 0.1675073504447937, "rewards/rejected": -0.3155926465988159, "step": 2384 }, { "epoch": 6.529774127310062, "grad_norm": 3.001124143600464, "learning_rate": 6.732876712328767e-07, "log_odds_chosen": 2.4230542182922363, "log_odds_ratio": -0.23296618461608887, "logits/chosen": 0.7403594255447388, "logits/rejected": 0.6528899669647217, "logps/chosen": -1.4349700212478638, "logps/rejected": -3.6113033294677734, "loss": 0.7252, "nll_loss": 0.7018669247627258, "rewards/accuracies": 1.0, "rewards/chosen": -0.14349700510501862, "rewards/margins": 0.21763333678245544, "rewards/rejected": -0.36113035678863525, "step": 2385 }, { "epoch": 6.532511978097194, "grad_norm": 4.311478137969971, "learning_rate": 6.731506849315068e-07, "log_odds_chosen": 2.014728546142578, "log_odds_ratio": -0.35490190982818604, "logits/chosen": 0.7438343167304993, "logits/rejected": 0.74139404296875, "logps/chosen": -1.8851640224456787, "logps/rejected": -3.7173588275909424, "loss": 0.715, "nll_loss": 0.6795463562011719, "rewards/accuracies": 0.875, "rewards/chosen": -0.18851640820503235, "rewards/margins": 0.18321947753429413, "rewards/rejected": -0.37173590064048767, "step": 2386 }, { "epoch": 6.535249828884326, "grad_norm": 3.1082141399383545, "learning_rate": 6.730136986301369e-07, "log_odds_chosen": 1.742188572883606, "log_odds_ratio": -0.2829478681087494, "logits/chosen": 0.8024820685386658, "logits/rejected": 0.8134686946868896, "logps/chosen": -1.559929370880127, "logps/rejected": -3.145554304122925, "loss": 0.6885, "nll_loss": 0.6602377891540527, "rewards/accuracies": 1.0, "rewards/chosen": -0.15599294006824493, "rewards/margins": 0.15856248140335083, "rewards/rejected": -0.31455543637275696, "step": 2387 }, { "epoch": 6.537987679671458, "grad_norm": 2.634634494781494, "learning_rate": 6.728767123287671e-07, "log_odds_chosen": 2.76705265045166, "log_odds_ratio": -0.21302708983421326, "logits/chosen": 0.9340337514877319, "logits/rejected": 0.9308737516403198, "logps/chosen": -1.690476655960083, "logps/rejected": -4.1980438232421875, "loss": 0.7506, "nll_loss": 0.7292639017105103, "rewards/accuracies": 1.0, "rewards/chosen": -0.16904765367507935, "rewards/margins": 0.25075674057006836, "rewards/rejected": -0.4198043942451477, "step": 2388 }, { "epoch": 6.54072553045859, "grad_norm": 2.4811832904815674, "learning_rate": 6.727397260273972e-07, "log_odds_chosen": 3.945788860321045, "log_odds_ratio": -0.10982154309749603, "logits/chosen": 1.221152901649475, "logits/rejected": 1.2725739479064941, "logps/chosen": -1.6498386859893799, "logps/rejected": -5.4012603759765625, "loss": 0.5806, "nll_loss": 0.5695783495903015, "rewards/accuracies": 1.0, "rewards/chosen": -0.16498388350009918, "rewards/margins": 0.37514209747314453, "rewards/rejected": -0.5401260256767273, "step": 2389 }, { "epoch": 6.543463381245722, "grad_norm": 2.4969801902770996, "learning_rate": 6.726027397260273e-07, "log_odds_chosen": 3.3464856147766113, "log_odds_ratio": -0.09787844121456146, "logits/chosen": 1.1007652282714844, "logits/rejected": 1.085559606552124, "logps/chosen": -2.552992105484009, "logps/rejected": -5.7862348556518555, "loss": 0.6553, "nll_loss": 0.6455013751983643, "rewards/accuracies": 1.0, "rewards/chosen": -0.2552992105484009, "rewards/margins": 0.3233242630958557, "rewards/rejected": -0.5786234736442566, "step": 2390 }, { "epoch": 6.546201232032854, "grad_norm": 3.278707981109619, "learning_rate": 6.724657534246575e-07, "log_odds_chosen": 1.701648235321045, "log_odds_ratio": -0.26879066228866577, "logits/chosen": 1.0136091709136963, "logits/rejected": 1.0196037292480469, "logps/chosen": -2.307154893875122, "logps/rejected": -3.899756908416748, "loss": 0.6916, "nll_loss": 0.6647471785545349, "rewards/accuracies": 0.875, "rewards/chosen": -0.23071549832820892, "rewards/margins": 0.15926019847393036, "rewards/rejected": -0.3899756669998169, "step": 2391 }, { "epoch": 6.548939082819986, "grad_norm": 2.3851277828216553, "learning_rate": 6.723287671232877e-07, "log_odds_chosen": 1.5918629169464111, "log_odds_ratio": -0.30618351697921753, "logits/chosen": 0.7432675361633301, "logits/rejected": 0.7190586924552917, "logps/chosen": -2.1414175033569336, "logps/rejected": -3.6257219314575195, "loss": 0.7398, "nll_loss": 0.7091706395149231, "rewards/accuracies": 0.75, "rewards/chosen": -0.21414175629615784, "rewards/margins": 0.1484304666519165, "rewards/rejected": -0.36257222294807434, "step": 2392 }, { "epoch": 6.551676933607118, "grad_norm": 8.33093547821045, "learning_rate": 6.721917808219177e-07, "log_odds_chosen": 1.67936110496521, "log_odds_ratio": -0.8366532921791077, "logits/chosen": 0.8188250064849854, "logits/rejected": 0.7997158765792847, "logps/chosen": -2.9333715438842773, "logps/rejected": -4.555755615234375, "loss": 0.8336, "nll_loss": 0.7498948574066162, "rewards/accuracies": 0.75, "rewards/chosen": -0.2933371663093567, "rewards/margins": 0.16223837435245514, "rewards/rejected": -0.45557552576065063, "step": 2393 }, { "epoch": 6.55441478439425, "grad_norm": 5.733500003814697, "learning_rate": 6.720547945205479e-07, "log_odds_chosen": 2.65085506439209, "log_odds_ratio": -0.5009579658508301, "logits/chosen": 0.8204348087310791, "logits/rejected": 0.7969400882720947, "logps/chosen": -2.67561674118042, "logps/rejected": -5.152432918548584, "loss": 0.6912, "nll_loss": 0.6411124467849731, "rewards/accuracies": 0.875, "rewards/chosen": -0.267561674118042, "rewards/margins": 0.2476816028356552, "rewards/rejected": -0.5152432918548584, "step": 2394 }, { "epoch": 6.5571526351813825, "grad_norm": 2.571439027786255, "learning_rate": 6.719178082191781e-07, "log_odds_chosen": 1.9145479202270508, "log_odds_ratio": -0.22821906208992004, "logits/chosen": 0.5482960343360901, "logits/rejected": 0.5168849229812622, "logps/chosen": -1.405822515487671, "logps/rejected": -3.0786609649658203, "loss": 0.6758, "nll_loss": 0.6530258655548096, "rewards/accuracies": 1.0, "rewards/chosen": -0.14058223366737366, "rewards/margins": 0.16728386282920837, "rewards/rejected": -0.30786609649658203, "step": 2395 }, { "epoch": 6.5598904859685145, "grad_norm": 2.7002673149108887, "learning_rate": 6.717808219178081e-07, "log_odds_chosen": 1.7006783485412598, "log_odds_ratio": -0.2030697464942932, "logits/chosen": 0.6421283483505249, "logits/rejected": 0.6157118082046509, "logps/chosen": -1.3908649682998657, "logps/rejected": -2.858372688293457, "loss": 0.7739, "nll_loss": 0.753565788269043, "rewards/accuracies": 1.0, "rewards/chosen": -0.1390864998102188, "rewards/margins": 0.146750807762146, "rewards/rejected": -0.2858372926712036, "step": 2396 }, { "epoch": 6.562628336755647, "grad_norm": 3.879056692123413, "learning_rate": 6.716438356164383e-07, "log_odds_chosen": 1.4274719953536987, "log_odds_ratio": -0.4120180904865265, "logits/chosen": 0.6270418167114258, "logits/rejected": 0.572323739528656, "logps/chosen": -1.9098083972930908, "logps/rejected": -3.1785688400268555, "loss": 0.7668, "nll_loss": 0.7255949378013611, "rewards/accuracies": 0.75, "rewards/chosen": -0.19098082184791565, "rewards/margins": 0.1268760859966278, "rewards/rejected": -0.31785690784454346, "step": 2397 }, { "epoch": 6.565366187542779, "grad_norm": 2.826552629470825, "learning_rate": 6.715068493150685e-07, "log_odds_chosen": 1.6884100437164307, "log_odds_ratio": -0.2477278858423233, "logits/chosen": 0.5271326303482056, "logits/rejected": 0.4468952417373657, "logps/chosen": -1.6891953945159912, "logps/rejected": -3.2125461101531982, "loss": 0.7255, "nll_loss": 0.7007125020027161, "rewards/accuracies": 0.875, "rewards/chosen": -0.16891954839229584, "rewards/margins": 0.152335062623024, "rewards/rejected": -0.3212546110153198, "step": 2398 }, { "epoch": 6.568104038329911, "grad_norm": 4.22003698348999, "learning_rate": 6.713698630136986e-07, "log_odds_chosen": 1.6215845346450806, "log_odds_ratio": -0.5784561634063721, "logits/chosen": 0.8371085524559021, "logits/rejected": 0.8662388324737549, "logps/chosen": -3.441236972808838, "logps/rejected": -5.0097527503967285, "loss": 0.8024, "nll_loss": 0.7445422410964966, "rewards/accuracies": 0.875, "rewards/chosen": -0.3441236913204193, "rewards/margins": 0.15685158967971802, "rewards/rejected": -0.5009752511978149, "step": 2399 }, { "epoch": 6.570841889117043, "grad_norm": 2.4841177463531494, "learning_rate": 6.712328767123287e-07, "log_odds_chosen": 1.757324457168579, "log_odds_ratio": -0.30340808629989624, "logits/chosen": 0.6276211738586426, "logits/rejected": 0.5690183639526367, "logps/chosen": -1.8007497787475586, "logps/rejected": -3.3836567401885986, "loss": 0.7209, "nll_loss": 0.6905797719955444, "rewards/accuracies": 1.0, "rewards/chosen": -0.18007497489452362, "rewards/margins": 0.15829071402549744, "rewards/rejected": -0.33836570382118225, "step": 2400 }, { "epoch": 6.573579739904175, "grad_norm": 3.315335988998413, "learning_rate": 6.710958904109588e-07, "log_odds_chosen": 1.5323915481567383, "log_odds_ratio": -0.31594061851501465, "logits/chosen": 0.9130065441131592, "logits/rejected": 0.901713490486145, "logps/chosen": -2.924346446990967, "logps/rejected": -4.357027530670166, "loss": 0.7188, "nll_loss": 0.6872302889823914, "rewards/accuracies": 1.0, "rewards/chosen": -0.2924346625804901, "rewards/margins": 0.14326812326908112, "rewards/rejected": -0.43570277094841003, "step": 2401 }, { "epoch": 6.576317590691307, "grad_norm": 2.880753993988037, "learning_rate": 6.70958904109589e-07, "log_odds_chosen": 1.5689595937728882, "log_odds_ratio": -0.41216808557510376, "logits/chosen": 0.6537371873855591, "logits/rejected": 0.5875782370567322, "logps/chosen": -1.7416067123413086, "logps/rejected": -3.19932222366333, "loss": 0.7648, "nll_loss": 0.7236106395721436, "rewards/accuracies": 0.875, "rewards/chosen": -0.1741606891155243, "rewards/margins": 0.1457715630531311, "rewards/rejected": -0.3199322521686554, "step": 2402 }, { "epoch": 6.57905544147844, "grad_norm": 3.2039759159088135, "learning_rate": 6.708219178082191e-07, "log_odds_chosen": 2.346686840057373, "log_odds_ratio": -0.19325338304042816, "logits/chosen": 0.9892191290855408, "logits/rejected": 1.0055962800979614, "logps/chosen": -2.073464870452881, "logps/rejected": -4.32318115234375, "loss": 0.6912, "nll_loss": 0.6719119548797607, "rewards/accuracies": 1.0, "rewards/chosen": -0.20734649896621704, "rewards/margins": 0.22497157752513885, "rewards/rejected": -0.4323180615901947, "step": 2403 }, { "epoch": 6.581793292265571, "grad_norm": 3.8275163173675537, "learning_rate": 6.706849315068492e-07, "log_odds_chosen": 0.630590558052063, "log_odds_ratio": -0.5092762112617493, "logits/chosen": 0.6569251418113708, "logits/rejected": 0.596856415271759, "logps/chosen": -1.3691247701644897, "logps/rejected": -1.86282217502594, "loss": 0.7596, "nll_loss": 0.7086926102638245, "rewards/accuracies": 0.875, "rewards/chosen": -0.1369124799966812, "rewards/margins": 0.04936974495649338, "rewards/rejected": -0.186282217502594, "step": 2404 }, { "epoch": 6.584531143052704, "grad_norm": 4.117195129394531, "learning_rate": 6.705479452054794e-07, "log_odds_chosen": 1.7268668413162231, "log_odds_ratio": -0.42896169424057007, "logits/chosen": 0.7250764966011047, "logits/rejected": 0.6891130805015564, "logps/chosen": -2.5412373542785645, "logps/rejected": -4.127743721008301, "loss": 0.727, "nll_loss": 0.6841372847557068, "rewards/accuracies": 0.875, "rewards/chosen": -0.2541237473487854, "rewards/margins": 0.15865063667297363, "rewards/rejected": -0.41277438402175903, "step": 2405 }, { "epoch": 6.587268993839836, "grad_norm": 3.1870903968811035, "learning_rate": 6.704109589041096e-07, "log_odds_chosen": 2.224849224090576, "log_odds_ratio": -0.37474918365478516, "logits/chosen": 0.7244623899459839, "logits/rejected": 0.7209422588348389, "logps/chosen": -1.838988184928894, "logps/rejected": -3.872835159301758, "loss": 0.806, "nll_loss": 0.7684999108314514, "rewards/accuracies": 0.75, "rewards/chosen": -0.18389880657196045, "rewards/margins": 0.20338471233844757, "rewards/rejected": -0.3872835338115692, "step": 2406 }, { "epoch": 6.590006844626968, "grad_norm": 2.934656858444214, "learning_rate": 6.702739726027396e-07, "log_odds_chosen": 3.020076274871826, "log_odds_ratio": -0.32755106687545776, "logits/chosen": 0.7924326658248901, "logits/rejected": 0.8009899854660034, "logps/chosen": -2.253398895263672, "logps/rejected": -5.1384477615356445, "loss": 0.7344, "nll_loss": 0.7016478776931763, "rewards/accuracies": 0.875, "rewards/chosen": -0.22533990442752838, "rewards/margins": 0.2885048985481262, "rewards/rejected": -0.5138448476791382, "step": 2407 }, { "epoch": 6.5927446954141, "grad_norm": 3.522547960281372, "learning_rate": 6.701369863013698e-07, "log_odds_chosen": 2.5027599334716797, "log_odds_ratio": -0.2279278039932251, "logits/chosen": 0.6841585636138916, "logits/rejected": 0.6204915046691895, "logps/chosen": -1.450577974319458, "logps/rejected": -3.656039237976074, "loss": 0.7378, "nll_loss": 0.7150443196296692, "rewards/accuracies": 1.0, "rewards/chosen": -0.1450577974319458, "rewards/margins": 0.22054612636566162, "rewards/rejected": -0.3656039237976074, "step": 2408 }, { "epoch": 6.595482546201232, "grad_norm": 2.953760862350464, "learning_rate": 6.7e-07, "log_odds_chosen": 1.6649422645568848, "log_odds_ratio": -0.2612324655056, "logits/chosen": 0.69368577003479, "logits/rejected": 0.6145135164260864, "logps/chosen": -1.9519984722137451, "logps/rejected": -3.441251039505005, "loss": 0.7195, "nll_loss": 0.6933390498161316, "rewards/accuracies": 0.875, "rewards/chosen": -0.1951998472213745, "rewards/margins": 0.1489252895116806, "rewards/rejected": -0.3441251218318939, "step": 2409 }, { "epoch": 6.598220396988364, "grad_norm": 2.9892947673797607, "learning_rate": 6.6986301369863e-07, "log_odds_chosen": 3.494497299194336, "log_odds_ratio": -0.06679002940654755, "logits/chosen": 1.010526418685913, "logits/rejected": 1.0533154010772705, "logps/chosen": -2.181896209716797, "logps/rejected": -5.4611005783081055, "loss": 0.6899, "nll_loss": 0.6832014918327332, "rewards/accuracies": 1.0, "rewards/chosen": -0.21818962693214417, "rewards/margins": 0.32792043685913086, "rewards/rejected": -0.5461100339889526, "step": 2410 }, { "epoch": 6.6009582477754964, "grad_norm": 3.4827141761779785, "learning_rate": 6.697260273972602e-07, "log_odds_chosen": 2.974282741546631, "log_odds_ratio": -0.41283056139945984, "logits/chosen": 0.6457453966140747, "logits/rejected": 0.6634211540222168, "logps/chosen": -1.8921151161193848, "logps/rejected": -4.77718448638916, "loss": 0.7354, "nll_loss": 0.6941345930099487, "rewards/accuracies": 0.75, "rewards/chosen": -0.18921151757240295, "rewards/margins": 0.28850695490837097, "rewards/rejected": -0.4777184724807739, "step": 2411 }, { "epoch": 6.6036960985626285, "grad_norm": 3.1852660179138184, "learning_rate": 6.695890410958904e-07, "log_odds_chosen": 1.3264071941375732, "log_odds_ratio": -0.4077224135398865, "logits/chosen": 0.7703088521957397, "logits/rejected": 0.7217056751251221, "logps/chosen": -1.6650322675704956, "logps/rejected": -2.779944896697998, "loss": 0.7295, "nll_loss": 0.6887298226356506, "rewards/accuracies": 1.0, "rewards/chosen": -0.16650323569774628, "rewards/margins": 0.11149129271507263, "rewards/rejected": -0.2779945135116577, "step": 2412 }, { "epoch": 6.606433949349761, "grad_norm": 3.7793900966644287, "learning_rate": 6.694520547945204e-07, "log_odds_chosen": 2.346061944961548, "log_odds_ratio": -0.3242342472076416, "logits/chosen": 0.8916275501251221, "logits/rejected": 0.924723744392395, "logps/chosen": -2.750615119934082, "logps/rejected": -4.978735446929932, "loss": 0.8317, "nll_loss": 0.7992470264434814, "rewards/accuracies": 0.875, "rewards/chosen": -0.27506154775619507, "rewards/margins": 0.22281202673912048, "rewards/rejected": -0.49787354469299316, "step": 2413 }, { "epoch": 6.609171800136893, "grad_norm": 2.828859806060791, "learning_rate": 6.693150684931506e-07, "log_odds_chosen": 2.9331510066986084, "log_odds_ratio": -0.17791366577148438, "logits/chosen": 0.5414913892745972, "logits/rejected": 0.4896962642669678, "logps/chosen": -1.7878386974334717, "logps/rejected": -4.532386779785156, "loss": 0.7213, "nll_loss": 0.7034964561462402, "rewards/accuracies": 1.0, "rewards/chosen": -0.1787838637828827, "rewards/margins": 0.2744547724723816, "rewards/rejected": -0.4532386362552643, "step": 2414 }, { "epoch": 6.611909650924025, "grad_norm": 2.2994778156280518, "learning_rate": 6.691780821917809e-07, "log_odds_chosen": 4.040127754211426, "log_odds_ratio": -0.17186114192008972, "logits/chosen": 0.8021441102027893, "logits/rejected": 0.7916762232780457, "logps/chosen": -1.8044260740280151, "logps/rejected": -5.611913681030273, "loss": 0.7454, "nll_loss": 0.7281911373138428, "rewards/accuracies": 1.0, "rewards/chosen": -0.18044260144233704, "rewards/margins": 0.38074880838394165, "rewards/rejected": -0.5611914396286011, "step": 2415 }, { "epoch": 6.614647501711157, "grad_norm": 4.851619720458984, "learning_rate": 6.69041095890411e-07, "log_odds_chosen": 2.1908817291259766, "log_odds_ratio": -0.5374298691749573, "logits/chosen": 0.891149640083313, "logits/rejected": 0.9228294491767883, "logps/chosen": -2.664961099624634, "logps/rejected": -4.74977445602417, "loss": 0.7927, "nll_loss": 0.7389334440231323, "rewards/accuracies": 0.75, "rewards/chosen": -0.26649612188339233, "rewards/margins": 0.20848135650157928, "rewards/rejected": -0.4749774634838104, "step": 2416 }, { "epoch": 6.617385352498289, "grad_norm": 2.6116411685943604, "learning_rate": 6.68904109589041e-07, "log_odds_chosen": 3.1069071292877197, "log_odds_ratio": -0.19219914078712463, "logits/chosen": 0.8958245515823364, "logits/rejected": 0.8950178623199463, "logps/chosen": -1.8613801002502441, "logps/rejected": -4.816845417022705, "loss": 0.7093, "nll_loss": 0.690038800239563, "rewards/accuracies": 1.0, "rewards/chosen": -0.18613803386688232, "rewards/margins": 0.2955465614795685, "rewards/rejected": -0.4816845655441284, "step": 2417 }, { "epoch": 6.620123203285421, "grad_norm": 3.071051597595215, "learning_rate": 6.687671232876711e-07, "log_odds_chosen": 2.142592430114746, "log_odds_ratio": -0.205455020070076, "logits/chosen": 0.804784893989563, "logits/rejected": 0.773382306098938, "logps/chosen": -1.9152672290802002, "logps/rejected": -3.912926435470581, "loss": 0.6794, "nll_loss": 0.6588695049285889, "rewards/accuracies": 1.0, "rewards/chosen": -0.19152672588825226, "rewards/margins": 0.19976595044136047, "rewards/rejected": -0.3912926912307739, "step": 2418 }, { "epoch": 6.622861054072553, "grad_norm": 3.135457754135132, "learning_rate": 6.686301369863014e-07, "log_odds_chosen": 1.5262417793273926, "log_odds_ratio": -0.3550410568714142, "logits/chosen": 0.9004617929458618, "logits/rejected": 0.9250174164772034, "logps/chosen": -1.9428653717041016, "logps/rejected": -3.359802484512329, "loss": 0.7636, "nll_loss": 0.7280914783477783, "rewards/accuracies": 0.875, "rewards/chosen": -0.1942865252494812, "rewards/margins": 0.14169371128082275, "rewards/rejected": -0.33598023653030396, "step": 2419 }, { "epoch": 6.625598904859685, "grad_norm": 3.1950953006744385, "learning_rate": 6.684931506849316e-07, "log_odds_chosen": 2.9338483810424805, "log_odds_ratio": -0.23104330897331238, "logits/chosen": 0.9944052696228027, "logits/rejected": 0.994743287563324, "logps/chosen": -2.1492199897766113, "logps/rejected": -4.9154582023620605, "loss": 0.684, "nll_loss": 0.6608893871307373, "rewards/accuracies": 0.875, "rewards/chosen": -0.2149220108985901, "rewards/margins": 0.27662378549575806, "rewards/rejected": -0.49154579639434814, "step": 2420 }, { "epoch": 6.628336755646817, "grad_norm": 5.843115329742432, "learning_rate": 6.683561643835616e-07, "log_odds_chosen": 2.1471688747406006, "log_odds_ratio": -0.42632415890693665, "logits/chosen": 0.880316972732544, "logits/rejected": 0.8828306794166565, "logps/chosen": -2.568216562271118, "logps/rejected": -4.576719284057617, "loss": 0.6925, "nll_loss": 0.6498661637306213, "rewards/accuracies": 0.875, "rewards/chosen": -0.2568216323852539, "rewards/margins": 0.200850248336792, "rewards/rejected": -0.4576719403266907, "step": 2421 }, { "epoch": 6.631074606433949, "grad_norm": 2.8670732975006104, "learning_rate": 6.682191780821918e-07, "log_odds_chosen": 3.9586544036865234, "log_odds_ratio": -0.07793545722961426, "logits/chosen": 0.9515795111656189, "logits/rejected": 0.9143315553665161, "logps/chosen": -1.6151657104492188, "logps/rejected": -5.366919994354248, "loss": 0.6463, "nll_loss": 0.6384984850883484, "rewards/accuracies": 1.0, "rewards/chosen": -0.16151657700538635, "rewards/margins": 0.375175416469574, "rewards/rejected": -0.5366920232772827, "step": 2422 }, { "epoch": 6.633812457221081, "grad_norm": 3.8985159397125244, "learning_rate": 6.68082191780822e-07, "log_odds_chosen": 1.3315120935440063, "log_odds_ratio": -0.40851080417633057, "logits/chosen": 0.9382573366165161, "logits/rejected": 0.9438766837120056, "logps/chosen": -1.6168702840805054, "logps/rejected": -2.8526792526245117, "loss": 0.6182, "nll_loss": 0.5773183107376099, "rewards/accuracies": 0.75, "rewards/chosen": -0.16168703138828278, "rewards/margins": 0.12358089536428452, "rewards/rejected": -0.2852679193019867, "step": 2423 }, { "epoch": 6.636550308008213, "grad_norm": 4.3992156982421875, "learning_rate": 6.67945205479452e-07, "log_odds_chosen": 1.468936800956726, "log_odds_ratio": -0.37136349081993103, "logits/chosen": 0.7477658987045288, "logits/rejected": 0.710664689540863, "logps/chosen": -2.6649959087371826, "logps/rejected": -4.035760402679443, "loss": 0.8331, "nll_loss": 0.7960013151168823, "rewards/accuracies": 0.875, "rewards/chosen": -0.2664995789527893, "rewards/margins": 0.13707642257213593, "rewards/rejected": -0.4035760164260864, "step": 2424 }, { "epoch": 6.639288158795345, "grad_norm": 2.5370595455169678, "learning_rate": 6.678082191780822e-07, "log_odds_chosen": 2.6999645233154297, "log_odds_ratio": -0.2725048065185547, "logits/chosen": 1.108262538909912, "logits/rejected": 1.1345210075378418, "logps/chosen": -1.9502456188201904, "logps/rejected": -4.553796768188477, "loss": 0.6696, "nll_loss": 0.6423393487930298, "rewards/accuracies": 0.875, "rewards/chosen": -0.1950245499610901, "rewards/margins": 0.2603551149368286, "rewards/rejected": -0.4553796648979187, "step": 2425 }, { "epoch": 6.6420260095824775, "grad_norm": 3.5392251014709473, "learning_rate": 6.676712328767124e-07, "log_odds_chosen": 2.097611665725708, "log_odds_ratio": -0.359822541475296, "logits/chosen": 0.8054007291793823, "logits/rejected": 0.776762068271637, "logps/chosen": -2.188293933868408, "logps/rejected": -4.208587646484375, "loss": 0.8269, "nll_loss": 0.7909449338912964, "rewards/accuracies": 0.75, "rewards/chosen": -0.21882939338684082, "rewards/margins": 0.20202942192554474, "rewards/rejected": -0.42085880041122437, "step": 2426 }, { "epoch": 6.6447638603696095, "grad_norm": 2.964918613433838, "learning_rate": 6.675342465753424e-07, "log_odds_chosen": 1.7363402843475342, "log_odds_ratio": -0.2604181468486786, "logits/chosen": 0.8940118551254272, "logits/rejected": 0.8911531567573547, "logps/chosen": -2.1079983711242676, "logps/rejected": -3.582193613052368, "loss": 0.6975, "nll_loss": 0.6714987754821777, "rewards/accuracies": 0.875, "rewards/chosen": -0.21079984307289124, "rewards/margins": 0.1474195271730423, "rewards/rejected": -0.35821935534477234, "step": 2427 }, { "epoch": 6.647501711156742, "grad_norm": 5.222742080688477, "learning_rate": 6.673972602739726e-07, "log_odds_chosen": 2.4346086978912354, "log_odds_ratio": -0.45313310623168945, "logits/chosen": 0.9144157767295837, "logits/rejected": 0.9164229035377502, "logps/chosen": -2.2647345066070557, "logps/rejected": -4.468242168426514, "loss": 0.7437, "nll_loss": 0.6983817219734192, "rewards/accuracies": 0.875, "rewards/chosen": -0.22647346556186676, "rewards/margins": 0.22035075724124908, "rewards/rejected": -0.44682422280311584, "step": 2428 }, { "epoch": 6.650239561943874, "grad_norm": 2.6889052391052246, "learning_rate": 6.672602739726028e-07, "log_odds_chosen": 3.6490087509155273, "log_odds_ratio": -0.12688657641410828, "logits/chosen": 0.631524920463562, "logits/rejected": 0.4915340542793274, "logps/chosen": -2.402723789215088, "logps/rejected": -5.873114109039307, "loss": 0.7285, "nll_loss": 0.7158204317092896, "rewards/accuracies": 1.0, "rewards/chosen": -0.2402723729610443, "rewards/margins": 0.34703904390335083, "rewards/rejected": -0.5873114466667175, "step": 2429 }, { "epoch": 6.652977412731007, "grad_norm": 2.8054423332214355, "learning_rate": 6.671232876712329e-07, "log_odds_chosen": 2.400007724761963, "log_odds_ratio": -0.19456332921981812, "logits/chosen": 0.9086371660232544, "logits/rejected": 0.9073842763900757, "logps/chosen": -1.9576034545898438, "logps/rejected": -4.201070785522461, "loss": 0.638, "nll_loss": 0.6185868978500366, "rewards/accuracies": 0.875, "rewards/chosen": -0.1957603543996811, "rewards/margins": 0.22434674203395844, "rewards/rejected": -0.42010706663131714, "step": 2430 }, { "epoch": 6.655715263518138, "grad_norm": 2.775730848312378, "learning_rate": 6.66986301369863e-07, "log_odds_chosen": 2.4920926094055176, "log_odds_ratio": -0.27612069249153137, "logits/chosen": 0.719650387763977, "logits/rejected": 0.6513946056365967, "logps/chosen": -1.829270601272583, "logps/rejected": -4.168722629547119, "loss": 0.6709, "nll_loss": 0.643315315246582, "rewards/accuracies": 0.875, "rewards/chosen": -0.18292705714702606, "rewards/margins": 0.23394522070884705, "rewards/rejected": -0.4168722927570343, "step": 2431 }, { "epoch": 6.658453114305271, "grad_norm": 2.501943349838257, "learning_rate": 6.668493150684931e-07, "log_odds_chosen": 2.7647457122802734, "log_odds_ratio": -0.2746422588825226, "logits/chosen": 0.8442414999008179, "logits/rejected": 0.7851029634475708, "logps/chosen": -1.6944793462753296, "logps/rejected": -4.277644157409668, "loss": 0.7651, "nll_loss": 0.7376442551612854, "rewards/accuracies": 1.0, "rewards/chosen": -0.16944792866706848, "rewards/margins": 0.2583164572715759, "rewards/rejected": -0.4277644157409668, "step": 2432 }, { "epoch": 6.661190965092403, "grad_norm": 3.039511203765869, "learning_rate": 6.667123287671233e-07, "log_odds_chosen": 2.658658027648926, "log_odds_ratio": -0.2716897428035736, "logits/chosen": 0.5565438270568848, "logits/rejected": 0.5342966318130493, "logps/chosen": -1.9232251644134521, "logps/rejected": -4.450353145599365, "loss": 0.6946, "nll_loss": 0.6673951745033264, "rewards/accuracies": 1.0, "rewards/chosen": -0.1923225075006485, "rewards/margins": 0.25271284580230713, "rewards/rejected": -0.44503533840179443, "step": 2433 }, { "epoch": 6.663928815879535, "grad_norm": 3.606257915496826, "learning_rate": 6.665753424657534e-07, "log_odds_chosen": 1.4402527809143066, "log_odds_ratio": -0.40103083848953247, "logits/chosen": 0.7042844295501709, "logits/rejected": 0.684133768081665, "logps/chosen": -2.486269950866699, "logps/rejected": -3.8900110721588135, "loss": 0.8013, "nll_loss": 0.7611526846885681, "rewards/accuracies": 0.625, "rewards/chosen": -0.24862700700759888, "rewards/margins": 0.1403741091489792, "rewards/rejected": -0.38900110125541687, "step": 2434 }, { "epoch": 6.666666666666667, "grad_norm": 3.381812334060669, "learning_rate": 6.664383561643835e-07, "log_odds_chosen": 1.437370777130127, "log_odds_ratio": -0.3660864233970642, "logits/chosen": 0.757946252822876, "logits/rejected": 0.6745670437812805, "logps/chosen": -1.725494623184204, "logps/rejected": -2.8901443481445312, "loss": 0.7578, "nll_loss": 0.72120201587677, "rewards/accuracies": 0.875, "rewards/chosen": -0.17254945635795593, "rewards/margins": 0.11646498739719391, "rewards/rejected": -0.28901445865631104, "step": 2435 }, { "epoch": 6.669404517453799, "grad_norm": 2.5465500354766846, "learning_rate": 6.663013698630137e-07, "log_odds_chosen": 1.2630000114440918, "log_odds_ratio": -0.31601083278656006, "logits/chosen": 0.9765162467956543, "logits/rejected": 0.9434084892272949, "logps/chosen": -1.418982982635498, "logps/rejected": -2.5061378479003906, "loss": 0.6388, "nll_loss": 0.6072386503219604, "rewards/accuracies": 1.0, "rewards/chosen": -0.1418982893228531, "rewards/margins": 0.1087154895067215, "rewards/rejected": -0.2506137788295746, "step": 2436 }, { "epoch": 6.672142368240931, "grad_norm": 3.4389419555664062, "learning_rate": 6.661643835616439e-07, "log_odds_chosen": 3.4213762283325195, "log_odds_ratio": -0.17952054738998413, "logits/chosen": 0.9390571117401123, "logits/rejected": 0.9510698318481445, "logps/chosen": -1.9190788269042969, "logps/rejected": -5.070395469665527, "loss": 0.7317, "nll_loss": 0.7137657999992371, "rewards/accuracies": 1.0, "rewards/chosen": -0.1919078826904297, "rewards/margins": 0.31513163447380066, "rewards/rejected": -0.5070395469665527, "step": 2437 }, { "epoch": 6.674880219028063, "grad_norm": 3.2482738494873047, "learning_rate": 6.660273972602739e-07, "log_odds_chosen": 3.081437110900879, "log_odds_ratio": -0.24002589285373688, "logits/chosen": 0.8736245036125183, "logits/rejected": 0.9347043037414551, "logps/chosen": -2.5038528442382812, "logps/rejected": -5.486357688903809, "loss": 0.7686, "nll_loss": 0.7445512413978577, "rewards/accuracies": 0.875, "rewards/chosen": -0.2503853142261505, "rewards/margins": 0.2982504963874817, "rewards/rejected": -0.5486357808113098, "step": 2438 }, { "epoch": 6.677618069815195, "grad_norm": 3.9530837535858154, "learning_rate": 6.658904109589041e-07, "log_odds_chosen": 4.064805507659912, "log_odds_ratio": -0.1522197425365448, "logits/chosen": 1.1588342189788818, "logits/rejected": 1.2357287406921387, "logps/chosen": -3.4193832874298096, "logps/rejected": -7.426067352294922, "loss": 0.7332, "nll_loss": 0.7180145978927612, "rewards/accuracies": 0.875, "rewards/chosen": -0.3419383466243744, "rewards/margins": 0.4006684124469757, "rewards/rejected": -0.7426067590713501, "step": 2439 }, { "epoch": 6.680355920602327, "grad_norm": 2.8209192752838135, "learning_rate": 6.657534246575343e-07, "log_odds_chosen": 2.373302459716797, "log_odds_ratio": -0.2981939911842346, "logits/chosen": 0.8000695705413818, "logits/rejected": 0.789244532585144, "logps/chosen": -2.218904733657837, "logps/rejected": -4.450793266296387, "loss": 0.7028, "nll_loss": 0.6729410886764526, "rewards/accuracies": 0.75, "rewards/chosen": -0.22189047932624817, "rewards/margins": 0.22318881750106812, "rewards/rejected": -0.44507932662963867, "step": 2440 }, { "epoch": 6.683093771389459, "grad_norm": 3.3448803424835205, "learning_rate": 6.656164383561643e-07, "log_odds_chosen": 1.702319622039795, "log_odds_ratio": -0.3069691061973572, "logits/chosen": 0.7761984467506409, "logits/rejected": 0.7262980341911316, "logps/chosen": -2.6673147678375244, "logps/rejected": -4.290341854095459, "loss": 0.8764, "nll_loss": 0.8457471132278442, "rewards/accuracies": 1.0, "rewards/chosen": -0.26673147082328796, "rewards/margins": 0.16230271756649017, "rewards/rejected": -0.42903420329093933, "step": 2441 }, { "epoch": 6.685831622176591, "grad_norm": 2.826331615447998, "learning_rate": 6.654794520547945e-07, "log_odds_chosen": 2.2256762981414795, "log_odds_ratio": -0.3106287121772766, "logits/chosen": 0.8932005167007446, "logits/rejected": 0.9061968326568604, "logps/chosen": -2.006495952606201, "logps/rejected": -4.14293909072876, "loss": 0.7043, "nll_loss": 0.6732456684112549, "rewards/accuracies": 1.0, "rewards/chosen": -0.20064961910247803, "rewards/margins": 0.21364428102970123, "rewards/rejected": -0.41429388523101807, "step": 2442 }, { "epoch": 6.6885694729637235, "grad_norm": 2.8390095233917236, "learning_rate": 6.653424657534247e-07, "log_odds_chosen": 1.0766663551330566, "log_odds_ratio": -0.4815065860748291, "logits/chosen": 0.8031033277511597, "logits/rejected": 0.7858366370201111, "logps/chosen": -2.0930280685424805, "logps/rejected": -3.0949881076812744, "loss": 0.714, "nll_loss": 0.6658250093460083, "rewards/accuracies": 0.75, "rewards/chosen": -0.20930281281471252, "rewards/margins": 0.1001959890127182, "rewards/rejected": -0.30949878692626953, "step": 2443 }, { "epoch": 6.691307323750856, "grad_norm": 2.7155630588531494, "learning_rate": 6.652054794520548e-07, "log_odds_chosen": 2.7502589225769043, "log_odds_ratio": -0.22348792850971222, "logits/chosen": 0.6272478103637695, "logits/rejected": 0.569347620010376, "logps/chosen": -1.7831149101257324, "logps/rejected": -4.26289176940918, "loss": 0.6837, "nll_loss": 0.6613345742225647, "rewards/accuracies": 1.0, "rewards/chosen": -0.17831149697303772, "rewards/margins": 0.24797770380973816, "rewards/rejected": -0.4262892007827759, "step": 2444 }, { "epoch": 6.694045174537988, "grad_norm": 2.453582286834717, "learning_rate": 6.650684931506849e-07, "log_odds_chosen": 2.299057960510254, "log_odds_ratio": -0.1928972750902176, "logits/chosen": 1.1723979711532593, "logits/rejected": 1.1162747144699097, "logps/chosen": -1.7030775547027588, "logps/rejected": -3.821484088897705, "loss": 0.6343, "nll_loss": 0.6150258183479309, "rewards/accuracies": 1.0, "rewards/chosen": -0.17030774056911469, "rewards/margins": 0.2118406593799591, "rewards/rejected": -0.3821483850479126, "step": 2445 }, { "epoch": 6.69678302532512, "grad_norm": 5.593602657318115, "learning_rate": 6.649315068493151e-07, "log_odds_chosen": 0.6100043058395386, "log_odds_ratio": -0.5864189863204956, "logits/chosen": 0.9884935617446899, "logits/rejected": 0.9802149534225464, "logps/chosen": -3.5583395957946777, "logps/rejected": -4.131694316864014, "loss": 0.7771, "nll_loss": 0.7184705138206482, "rewards/accuracies": 0.75, "rewards/chosen": -0.3558340072631836, "rewards/margins": 0.05733542516827583, "rewards/rejected": -0.41316941380500793, "step": 2446 }, { "epoch": 6.699520876112252, "grad_norm": 3.2894015312194824, "learning_rate": 6.647945205479452e-07, "log_odds_chosen": 1.1364529132843018, "log_odds_ratio": -0.3619588613510132, "logits/chosen": 0.8287647366523743, "logits/rejected": 0.8064543008804321, "logps/chosen": -1.7045248746871948, "logps/rejected": -2.705967426300049, "loss": 0.6917, "nll_loss": 0.6555033922195435, "rewards/accuracies": 1.0, "rewards/chosen": -0.17045250535011292, "rewards/margins": 0.10014423727989197, "rewards/rejected": -0.2705967426300049, "step": 2447 }, { "epoch": 6.702258726899384, "grad_norm": 3.9182801246643066, "learning_rate": 6.646575342465753e-07, "log_odds_chosen": 2.5735831260681152, "log_odds_ratio": -0.20220549404621124, "logits/chosen": 1.0096790790557861, "logits/rejected": 1.0117980241775513, "logps/chosen": -2.060300350189209, "logps/rejected": -4.473500728607178, "loss": 0.6464, "nll_loss": 0.6261336803436279, "rewards/accuracies": 1.0, "rewards/chosen": -0.20603004097938538, "rewards/margins": 0.24132002890110016, "rewards/rejected": -0.44735008478164673, "step": 2448 }, { "epoch": 6.704996577686516, "grad_norm": 2.611215353012085, "learning_rate": 6.645205479452054e-07, "log_odds_chosen": 1.5624302625656128, "log_odds_ratio": -0.3996468186378479, "logits/chosen": 0.775688648223877, "logits/rejected": 0.7621872425079346, "logps/chosen": -1.7838290929794312, "logps/rejected": -3.175109386444092, "loss": 0.7614, "nll_loss": 0.7214671969413757, "rewards/accuracies": 0.875, "rewards/chosen": -0.17838290333747864, "rewards/margins": 0.13912804424762726, "rewards/rejected": -0.3175109624862671, "step": 2449 }, { "epoch": 6.707734428473648, "grad_norm": 2.813760995864868, "learning_rate": 6.643835616438356e-07, "log_odds_chosen": 4.1732282638549805, "log_odds_ratio": -0.17072205245494843, "logits/chosen": 0.9167901873588562, "logits/rejected": 0.8688629865646362, "logps/chosen": -1.6763840913772583, "logps/rejected": -5.668366432189941, "loss": 0.6897, "nll_loss": 0.6726242303848267, "rewards/accuracies": 0.875, "rewards/chosen": -0.16763842105865479, "rewards/margins": 0.39919817447662354, "rewards/rejected": -0.5668365955352783, "step": 2450 }, { "epoch": 6.71047227926078, "grad_norm": 3.08735728263855, "learning_rate": 6.642465753424658e-07, "log_odds_chosen": 1.9847462177276611, "log_odds_ratio": -0.2610289454460144, "logits/chosen": 0.694624662399292, "logits/rejected": 0.6318553686141968, "logps/chosen": -1.8640456199645996, "logps/rejected": -3.6810524463653564, "loss": 0.7119, "nll_loss": 0.6858010292053223, "rewards/accuracies": 1.0, "rewards/chosen": -0.18640457093715668, "rewards/margins": 0.1817006766796112, "rewards/rejected": -0.3681052625179291, "step": 2451 }, { "epoch": 6.713210130047912, "grad_norm": 3.270911693572998, "learning_rate": 6.641095890410958e-07, "log_odds_chosen": 2.747896194458008, "log_odds_ratio": -0.27678823471069336, "logits/chosen": 0.6596713066101074, "logits/rejected": 0.632765531539917, "logps/chosen": -1.8796732425689697, "logps/rejected": -4.433444023132324, "loss": 0.723, "nll_loss": 0.6953566074371338, "rewards/accuracies": 1.0, "rewards/chosen": -0.18796733021736145, "rewards/margins": 0.25537702441215515, "rewards/rejected": -0.443344384431839, "step": 2452 }, { "epoch": 6.715947980835044, "grad_norm": 3.5369772911071777, "learning_rate": 6.63972602739726e-07, "log_odds_chosen": 3.074923276901245, "log_odds_ratio": -0.23384802043437958, "logits/chosen": 0.9120878577232361, "logits/rejected": 0.9375513792037964, "logps/chosen": -1.8578968048095703, "logps/rejected": -4.778036117553711, "loss": 0.657, "nll_loss": 0.6336272358894348, "rewards/accuracies": 1.0, "rewards/chosen": -0.18578968942165375, "rewards/margins": 0.292013943195343, "rewards/rejected": -0.47780364751815796, "step": 2453 }, { "epoch": 6.718685831622176, "grad_norm": 2.754502534866333, "learning_rate": 6.638356164383562e-07, "log_odds_chosen": 1.606427788734436, "log_odds_ratio": -0.26483121514320374, "logits/chosen": 0.5250872373580933, "logits/rejected": 0.46331867575645447, "logps/chosen": -1.6389694213867188, "logps/rejected": -3.056311845779419, "loss": 0.7509, "nll_loss": 0.7244360446929932, "rewards/accuracies": 1.0, "rewards/chosen": -0.16389694809913635, "rewards/margins": 0.14173422753810883, "rewards/rejected": -0.30563119053840637, "step": 2454 }, { "epoch": 6.721423682409308, "grad_norm": 4.2533721923828125, "learning_rate": 6.636986301369862e-07, "log_odds_chosen": 0.9904804229736328, "log_odds_ratio": -0.6082392930984497, "logits/chosen": 0.8706190586090088, "logits/rejected": 0.8292417526245117, "logps/chosen": -2.418391704559326, "logps/rejected": -3.329887628555298, "loss": 0.8228, "nll_loss": 0.7619796395301819, "rewards/accuracies": 0.75, "rewards/chosen": -0.24183915555477142, "rewards/margins": 0.09114963561296463, "rewards/rejected": -0.33298879861831665, "step": 2455 }, { "epoch": 6.72416153319644, "grad_norm": 2.59845232963562, "learning_rate": 6.635616438356164e-07, "log_odds_chosen": 3.0003175735473633, "log_odds_ratio": -0.1218128651380539, "logits/chosen": 1.0841047763824463, "logits/rejected": 1.0808414220809937, "logps/chosen": -2.1167309284210205, "logps/rejected": -4.986290454864502, "loss": 0.6253, "nll_loss": 0.6131153106689453, "rewards/accuracies": 1.0, "rewards/chosen": -0.2116730958223343, "rewards/margins": 0.28695595264434814, "rewards/rejected": -0.49862906336784363, "step": 2456 }, { "epoch": 6.726899383983573, "grad_norm": 2.7188124656677246, "learning_rate": 6.634246575342466e-07, "log_odds_chosen": 1.885664463043213, "log_odds_ratio": -0.24629275500774384, "logits/chosen": 0.7481838464736938, "logits/rejected": 0.7867511510848999, "logps/chosen": -1.8287278413772583, "logps/rejected": -3.5865230560302734, "loss": 0.7663, "nll_loss": 0.7417126297950745, "rewards/accuracies": 1.0, "rewards/chosen": -0.18287277221679688, "rewards/margins": 0.1757795512676239, "rewards/rejected": -0.3586523234844208, "step": 2457 }, { "epoch": 6.7296372347707045, "grad_norm": 4.759323596954346, "learning_rate": 6.632876712328767e-07, "log_odds_chosen": 1.2713242769241333, "log_odds_ratio": -0.5940951108932495, "logits/chosen": 0.8310662508010864, "logits/rejected": 0.8132606744766235, "logps/chosen": -2.1573195457458496, "logps/rejected": -3.311626672744751, "loss": 0.7682, "nll_loss": 0.7087783813476562, "rewards/accuracies": 0.75, "rewards/chosen": -0.21573194861412048, "rewards/margins": 0.11543071269989014, "rewards/rejected": -0.3311626613140106, "step": 2458 }, { "epoch": 6.7323750855578375, "grad_norm": 5.842931270599365, "learning_rate": 6.631506849315068e-07, "log_odds_chosen": 1.2516543865203857, "log_odds_ratio": -0.47695451974868774, "logits/chosen": 0.8869248628616333, "logits/rejected": 0.9064974784851074, "logps/chosen": -2.6400537490844727, "logps/rejected": -3.763075828552246, "loss": 0.7208, "nll_loss": 0.6731104850769043, "rewards/accuracies": 0.75, "rewards/chosen": -0.2640053629875183, "rewards/margins": 0.11230221390724182, "rewards/rejected": -0.3763076066970825, "step": 2459 }, { "epoch": 6.7351129363449695, "grad_norm": 3.735743284225464, "learning_rate": 6.63013698630137e-07, "log_odds_chosen": 1.8923676013946533, "log_odds_ratio": -0.2770099937915802, "logits/chosen": 0.7031247615814209, "logits/rejected": 0.7121658325195312, "logps/chosen": -2.335002899169922, "logps/rejected": -4.128253936767578, "loss": 0.7741, "nll_loss": 0.7463569641113281, "rewards/accuracies": 0.875, "rewards/chosen": -0.23350030183792114, "rewards/margins": 0.17932511866092682, "rewards/rejected": -0.41282540559768677, "step": 2460 }, { "epoch": 6.737850787132102, "grad_norm": 2.6915740966796875, "learning_rate": 6.628767123287671e-07, "log_odds_chosen": 2.712552070617676, "log_odds_ratio": -0.28587213158607483, "logits/chosen": 0.675426721572876, "logits/rejected": 0.6516842842102051, "logps/chosen": -2.262162446975708, "logps/rejected": -4.886159896850586, "loss": 0.7549, "nll_loss": 0.7262764573097229, "rewards/accuracies": 0.875, "rewards/chosen": -0.22621625661849976, "rewards/margins": 0.26239973306655884, "rewards/rejected": -0.4886159598827362, "step": 2461 }, { "epoch": 6.740588637919234, "grad_norm": 2.5396924018859863, "learning_rate": 6.627397260273972e-07, "log_odds_chosen": 1.3439384698867798, "log_odds_ratio": -0.2940804958343506, "logits/chosen": 0.6802051663398743, "logits/rejected": 0.5977036952972412, "logps/chosen": -1.6084164381027222, "logps/rejected": -2.784315586090088, "loss": 0.6317, "nll_loss": 0.6023169755935669, "rewards/accuracies": 1.0, "rewards/chosen": -0.16084164381027222, "rewards/margins": 0.11758994311094284, "rewards/rejected": -0.27843159437179565, "step": 2462 }, { "epoch": 6.743326488706366, "grad_norm": 2.8898861408233643, "learning_rate": 6.626027397260273e-07, "log_odds_chosen": 3.9261698722839355, "log_odds_ratio": -0.1521873027086258, "logits/chosen": 0.9082549810409546, "logits/rejected": 0.9037995338439941, "logps/chosen": -2.000544786453247, "logps/rejected": -5.682000160217285, "loss": 0.7352, "nll_loss": 0.7199923992156982, "rewards/accuracies": 1.0, "rewards/chosen": -0.20005446672439575, "rewards/margins": 0.368145614862442, "rewards/rejected": -0.5682001113891602, "step": 2463 }, { "epoch": 6.746064339493498, "grad_norm": 4.0287041664123535, "learning_rate": 6.624657534246575e-07, "log_odds_chosen": 0.8256477117538452, "log_odds_ratio": -0.4835037589073181, "logits/chosen": 0.7174561619758606, "logits/rejected": 0.7149085998535156, "logps/chosen": -2.4509339332580566, "logps/rejected": -3.135636329650879, "loss": 0.7091, "nll_loss": 0.6607891917228699, "rewards/accuracies": 0.75, "rewards/chosen": -0.24509340524673462, "rewards/margins": 0.06847023963928223, "rewards/rejected": -0.31356364488601685, "step": 2464 }, { "epoch": 6.74880219028063, "grad_norm": 2.7235679626464844, "learning_rate": 6.623287671232877e-07, "log_odds_chosen": 4.661779403686523, "log_odds_ratio": -0.10077346116304398, "logits/chosen": 1.1325029134750366, "logits/rejected": 1.1895307302474976, "logps/chosen": -1.8857977390289307, "logps/rejected": -6.3350911140441895, "loss": 0.6926, "nll_loss": 0.6825546622276306, "rewards/accuracies": 1.0, "rewards/chosen": -0.18857978284358978, "rewards/margins": 0.4449293613433838, "rewards/rejected": -0.6335091590881348, "step": 2465 }, { "epoch": 6.751540041067762, "grad_norm": 3.1403279304504395, "learning_rate": 6.621917808219177e-07, "log_odds_chosen": 2.334867477416992, "log_odds_ratio": -0.24729500710964203, "logits/chosen": 0.749152421951294, "logits/rejected": 0.7381592988967896, "logps/chosen": -2.0780081748962402, "logps/rejected": -4.24406099319458, "loss": 0.6536, "nll_loss": 0.6288820505142212, "rewards/accuracies": 1.0, "rewards/chosen": -0.20780080556869507, "rewards/margins": 0.2166053205728531, "rewards/rejected": -0.42440614104270935, "step": 2466 }, { "epoch": 6.754277891854894, "grad_norm": 3.362767457962036, "learning_rate": 6.620547945205479e-07, "log_odds_chosen": 1.6297484636306763, "log_odds_ratio": -0.3232977092266083, "logits/chosen": 0.64130699634552, "logits/rejected": 0.6187604665756226, "logps/chosen": -1.840040683746338, "logps/rejected": -3.2820324897766113, "loss": 0.7045, "nll_loss": 0.6721463203430176, "rewards/accuracies": 0.875, "rewards/chosen": -0.1840040683746338, "rewards/margins": 0.1441991925239563, "rewards/rejected": -0.3282032608985901, "step": 2467 }, { "epoch": 6.757015742642026, "grad_norm": 4.577317714691162, "learning_rate": 6.619178082191781e-07, "log_odds_chosen": 2.6624372005462646, "log_odds_ratio": -0.4820278286933899, "logits/chosen": 0.7284293174743652, "logits/rejected": 0.7796086668968201, "logps/chosen": -2.289677858352661, "logps/rejected": -4.847090244293213, "loss": 0.7339, "nll_loss": 0.6856569051742554, "rewards/accuracies": 0.75, "rewards/chosen": -0.22896775603294373, "rewards/margins": 0.2557412385940552, "rewards/rejected": -0.4847090244293213, "step": 2468 }, { "epoch": 6.759753593429158, "grad_norm": 7.648208141326904, "learning_rate": 6.617808219178081e-07, "log_odds_chosen": 1.145742416381836, "log_odds_ratio": -0.7650816440582275, "logits/chosen": 0.7982799410820007, "logits/rejected": 0.7571632862091064, "logps/chosen": -3.3844475746154785, "logps/rejected": -4.502557754516602, "loss": 0.8414, "nll_loss": 0.7648958563804626, "rewards/accuracies": 0.625, "rewards/chosen": -0.3384447693824768, "rewards/margins": 0.11181099712848663, "rewards/rejected": -0.45025575160980225, "step": 2469 }, { "epoch": 6.76249144421629, "grad_norm": 5.879176139831543, "learning_rate": 6.616438356164383e-07, "log_odds_chosen": 1.4037935733795166, "log_odds_ratio": -0.8992042541503906, "logits/chosen": 1.1120476722717285, "logits/rejected": 1.1584633588790894, "logps/chosen": -4.062463760375977, "logps/rejected": -5.405343055725098, "loss": 0.8076, "nll_loss": 0.7177203893661499, "rewards/accuracies": 0.75, "rewards/chosen": -0.4062464237213135, "rewards/margins": 0.13428792357444763, "rewards/rejected": -0.5405343174934387, "step": 2470 }, { "epoch": 6.765229295003422, "grad_norm": 2.7026288509368896, "learning_rate": 6.615068493150685e-07, "log_odds_chosen": 2.599433183670044, "log_odds_ratio": -0.11709301173686981, "logits/chosen": 1.005719542503357, "logits/rejected": 1.0131503343582153, "logps/chosen": -1.92341148853302, "logps/rejected": -4.260215759277344, "loss": 0.6504, "nll_loss": 0.6387302875518799, "rewards/accuracies": 1.0, "rewards/chosen": -0.192341148853302, "rewards/margins": 0.23368045687675476, "rewards/rejected": -0.4260215759277344, "step": 2471 }, { "epoch": 6.767967145790554, "grad_norm": 3.352879762649536, "learning_rate": 6.613698630136986e-07, "log_odds_chosen": 2.1258349418640137, "log_odds_ratio": -0.25689154863357544, "logits/chosen": 0.5080909729003906, "logits/rejected": 0.46067583560943604, "logps/chosen": -2.1025335788726807, "logps/rejected": -4.081202983856201, "loss": 0.7848, "nll_loss": 0.7591033577919006, "rewards/accuracies": 1.0, "rewards/chosen": -0.21025335788726807, "rewards/margins": 0.19786690175533295, "rewards/rejected": -0.4081202745437622, "step": 2472 }, { "epoch": 6.770704996577686, "grad_norm": 3.547182083129883, "learning_rate": 6.612328767123287e-07, "log_odds_chosen": 1.9505259990692139, "log_odds_ratio": -0.3948209881782532, "logits/chosen": 0.8184449672698975, "logits/rejected": 0.7904688119888306, "logps/chosen": -2.1308510303497314, "logps/rejected": -3.9066624641418457, "loss": 0.7409, "nll_loss": 0.7013989686965942, "rewards/accuracies": 0.75, "rewards/chosen": -0.2130851000547409, "rewards/margins": 0.17758114635944366, "rewards/rejected": -0.39066624641418457, "step": 2473 }, { "epoch": 6.7734428473648185, "grad_norm": 2.924255132675171, "learning_rate": 6.610958904109589e-07, "log_odds_chosen": 2.8005807399749756, "log_odds_ratio": -0.2524183690547943, "logits/chosen": 0.570492148399353, "logits/rejected": 0.5310969352722168, "logps/chosen": -1.954960584640503, "logps/rejected": -4.624561309814453, "loss": 0.7638, "nll_loss": 0.7385393381118774, "rewards/accuracies": 1.0, "rewards/chosen": -0.19549605250358582, "rewards/margins": 0.2669600248336792, "rewards/rejected": -0.4624560475349426, "step": 2474 }, { "epoch": 6.776180698151951, "grad_norm": 2.391237258911133, "learning_rate": 6.60958904109589e-07, "log_odds_chosen": 3.2120747566223145, "log_odds_ratio": -0.150839701294899, "logits/chosen": 0.7485238313674927, "logits/rejected": 0.7329709529876709, "logps/chosen": -2.526329517364502, "logps/rejected": -5.55869722366333, "loss": 0.6639, "nll_loss": 0.648857593536377, "rewards/accuracies": 1.0, "rewards/chosen": -0.2526329457759857, "rewards/margins": 0.30323678255081177, "rewards/rejected": -0.5558697581291199, "step": 2475 }, { "epoch": 6.778918548939083, "grad_norm": 3.8069629669189453, "learning_rate": 6.608219178082191e-07, "log_odds_chosen": 1.4414995908737183, "log_odds_ratio": -0.7893455028533936, "logits/chosen": 1.0100690126419067, "logits/rejected": 1.0017446279525757, "logps/chosen": -2.5906848907470703, "logps/rejected": -3.861617088317871, "loss": 0.6822, "nll_loss": 0.6032365560531616, "rewards/accuracies": 0.875, "rewards/chosen": -0.25906848907470703, "rewards/margins": 0.12709324061870575, "rewards/rejected": -0.386161744594574, "step": 2476 }, { "epoch": 6.781656399726215, "grad_norm": 3.454641819000244, "learning_rate": 6.606849315068492e-07, "log_odds_chosen": 1.642258644104004, "log_odds_ratio": -0.25228893756866455, "logits/chosen": 0.8464173674583435, "logits/rejected": 0.897493839263916, "logps/chosen": -2.6563618183135986, "logps/rejected": -4.178448677062988, "loss": 0.7568, "nll_loss": 0.7316126823425293, "rewards/accuracies": 1.0, "rewards/chosen": -0.2656362056732178, "rewards/margins": 0.15220868587493896, "rewards/rejected": -0.41784486174583435, "step": 2477 }, { "epoch": 6.784394250513347, "grad_norm": 3.621373414993286, "learning_rate": 6.605479452054794e-07, "log_odds_chosen": 1.3204394578933716, "log_odds_ratio": -0.5587629079818726, "logits/chosen": 0.811011791229248, "logits/rejected": 0.8190826177597046, "logps/chosen": -2.181511878967285, "logps/rejected": -3.3669228553771973, "loss": 0.7715, "nll_loss": 0.7156316637992859, "rewards/accuracies": 0.875, "rewards/chosen": -0.21815121173858643, "rewards/margins": 0.11854107677936554, "rewards/rejected": -0.33669230341911316, "step": 2478 }, { "epoch": 6.787132101300479, "grad_norm": 3.1846067905426025, "learning_rate": 6.604109589041096e-07, "log_odds_chosen": 2.9783520698547363, "log_odds_ratio": -0.24895226955413818, "logits/chosen": 0.7576164603233337, "logits/rejected": 0.7029552459716797, "logps/chosen": -1.715887188911438, "logps/rejected": -4.52415132522583, "loss": 0.7629, "nll_loss": 0.7380216717720032, "rewards/accuracies": 1.0, "rewards/chosen": -0.1715887188911438, "rewards/margins": 0.2808264493942261, "rewards/rejected": -0.4524151682853699, "step": 2479 }, { "epoch": 6.789869952087611, "grad_norm": 4.4826202392578125, "learning_rate": 6.602739726027396e-07, "log_odds_chosen": 1.2925841808319092, "log_odds_ratio": -0.6946641206741333, "logits/chosen": 0.6809792518615723, "logits/rejected": 0.6670566201210022, "logps/chosen": -2.8415136337280273, "logps/rejected": -4.071351051330566, "loss": 0.7988, "nll_loss": 0.7293093800544739, "rewards/accuracies": 0.625, "rewards/chosen": -0.2841513752937317, "rewards/margins": 0.12298379093408585, "rewards/rejected": -0.40713512897491455, "step": 2480 }, { "epoch": 6.792607802874743, "grad_norm": 3.1931684017181396, "learning_rate": 6.601369863013698e-07, "log_odds_chosen": 2.7566945552825928, "log_odds_ratio": -0.37678441405296326, "logits/chosen": 0.5873897671699524, "logits/rejected": 0.45448586344718933, "logps/chosen": -1.6528918743133545, "logps/rejected": -4.2592973709106445, "loss": 0.7423, "nll_loss": 0.7046544551849365, "rewards/accuracies": 0.75, "rewards/chosen": -0.16528919339179993, "rewards/margins": 0.2606405019760132, "rewards/rejected": -0.4259296655654907, "step": 2481 }, { "epoch": 6.795345653661875, "grad_norm": 3.098940372467041, "learning_rate": 6.6e-07, "log_odds_chosen": 1.8818176984786987, "log_odds_ratio": -0.30579042434692383, "logits/chosen": 0.8370857238769531, "logits/rejected": 0.7887271046638489, "logps/chosen": -1.7137788534164429, "logps/rejected": -3.4342153072357178, "loss": 0.6381, "nll_loss": 0.6075380444526672, "rewards/accuracies": 1.0, "rewards/chosen": -0.17137788236141205, "rewards/margins": 0.17204366624355316, "rewards/rejected": -0.3434215784072876, "step": 2482 }, { "epoch": 6.798083504449007, "grad_norm": 3.062291145324707, "learning_rate": 6.5986301369863e-07, "log_odds_chosen": 1.185905933380127, "log_odds_ratio": -0.35026007890701294, "logits/chosen": 0.6545508503913879, "logits/rejected": 0.5766190886497498, "logps/chosen": -2.0202674865722656, "logps/rejected": -3.0858588218688965, "loss": 0.7347, "nll_loss": 0.6996901631355286, "rewards/accuracies": 0.875, "rewards/chosen": -0.20202675461769104, "rewards/margins": 0.10655912756919861, "rewards/rejected": -0.30858588218688965, "step": 2483 }, { "epoch": 6.80082135523614, "grad_norm": 3.0021955966949463, "learning_rate": 6.597260273972602e-07, "log_odds_chosen": 4.3335185050964355, "log_odds_ratio": -0.03833457827568054, "logits/chosen": 1.1242375373840332, "logits/rejected": 1.1517796516418457, "logps/chosen": -1.7736762762069702, "logps/rejected": -5.831196308135986, "loss": 0.6724, "nll_loss": 0.668599009513855, "rewards/accuracies": 1.0, "rewards/chosen": -0.17736762762069702, "rewards/margins": 0.4057520031929016, "rewards/rejected": -0.5831196308135986, "step": 2484 }, { "epoch": 6.803559206023271, "grad_norm": 4.002308368682861, "learning_rate": 6.595890410958904e-07, "log_odds_chosen": 2.673616886138916, "log_odds_ratio": -0.36345407366752625, "logits/chosen": 1.18773353099823, "logits/rejected": 1.2106366157531738, "logps/chosen": -2.746180534362793, "logps/rejected": -5.342740535736084, "loss": 0.7479, "nll_loss": 0.7115725874900818, "rewards/accuracies": 0.75, "rewards/chosen": -0.2746180593967438, "rewards/margins": 0.25965601205825806, "rewards/rejected": -0.5342740416526794, "step": 2485 }, { "epoch": 6.806297056810404, "grad_norm": 2.562051773071289, "learning_rate": 6.594520547945205e-07, "log_odds_chosen": 1.222330093383789, "log_odds_ratio": -0.3628101348876953, "logits/chosen": 0.8370973467826843, "logits/rejected": 0.7917003035545349, "logps/chosen": -1.6109750270843506, "logps/rejected": -2.643019914627075, "loss": 0.7143, "nll_loss": 0.6779829263687134, "rewards/accuracies": 0.875, "rewards/chosen": -0.16109749674797058, "rewards/margins": 0.10320448875427246, "rewards/rejected": -0.26430198550224304, "step": 2486 }, { "epoch": 6.809034907597536, "grad_norm": 5.17079496383667, "learning_rate": 6.593150684931506e-07, "log_odds_chosen": 1.636575698852539, "log_odds_ratio": -0.3580520749092102, "logits/chosen": 0.8241226673126221, "logits/rejected": 0.8587538003921509, "logps/chosen": -2.5799612998962402, "logps/rejected": -4.108816146850586, "loss": 0.6615, "nll_loss": 0.6256657242774963, "rewards/accuracies": 0.875, "rewards/chosen": -0.257996141910553, "rewards/margins": 0.15288548171520233, "rewards/rejected": -0.4108816385269165, "step": 2487 }, { "epoch": 6.811772758384668, "grad_norm": 6.031540870666504, "learning_rate": 6.591780821917808e-07, "log_odds_chosen": 1.1495141983032227, "log_odds_ratio": -0.5354097485542297, "logits/chosen": 0.5513201355934143, "logits/rejected": 0.5688292980194092, "logps/chosen": -2.3442893028259277, "logps/rejected": -3.3958921432495117, "loss": 0.7213, "nll_loss": 0.6677112579345703, "rewards/accuracies": 0.875, "rewards/chosen": -0.23442894220352173, "rewards/margins": 0.10516027361154556, "rewards/rejected": -0.3395892083644867, "step": 2488 }, { "epoch": 6.8145106091718, "grad_norm": 2.8402185440063477, "learning_rate": 6.590410958904109e-07, "log_odds_chosen": 1.6802979707717896, "log_odds_ratio": -0.2581382691860199, "logits/chosen": 0.6965848207473755, "logits/rejected": 0.721988320350647, "logps/chosen": -2.107684373855591, "logps/rejected": -3.670900344848633, "loss": 0.6731, "nll_loss": 0.6472820043563843, "rewards/accuracies": 1.0, "rewards/chosen": -0.2107684463262558, "rewards/margins": 0.15632160007953644, "rewards/rejected": -0.3670900762081146, "step": 2489 }, { "epoch": 6.8172484599589325, "grad_norm": 2.8048815727233887, "learning_rate": 6.58904109589041e-07, "log_odds_chosen": 2.4190948009490967, "log_odds_ratio": -0.1597537100315094, "logits/chosen": 0.6800726652145386, "logits/rejected": 0.6026278138160706, "logps/chosen": -1.7340221405029297, "logps/rejected": -3.928638458251953, "loss": 0.6595, "nll_loss": 0.6434974670410156, "rewards/accuracies": 1.0, "rewards/chosen": -0.17340219020843506, "rewards/margins": 0.2194616198539734, "rewards/rejected": -0.39286383986473083, "step": 2490 }, { "epoch": 6.8199863107460645, "grad_norm": 3.3754079341888428, "learning_rate": 6.587671232876712e-07, "log_odds_chosen": 1.619773030281067, "log_odds_ratio": -0.39828115701675415, "logits/chosen": 0.8273739814758301, "logits/rejected": 0.796544075012207, "logps/chosen": -2.106008768081665, "logps/rejected": -3.5964934825897217, "loss": 0.7063, "nll_loss": 0.666520893573761, "rewards/accuracies": 0.875, "rewards/chosen": -0.21060088276863098, "rewards/margins": 0.14904850721359253, "rewards/rejected": -0.3596493899822235, "step": 2491 }, { "epoch": 6.822724161533197, "grad_norm": 2.741575002670288, "learning_rate": 6.586301369863013e-07, "log_odds_chosen": 4.070067882537842, "log_odds_ratio": -0.14516833424568176, "logits/chosen": 0.8164722919464111, "logits/rejected": 0.7833511829376221, "logps/chosen": -2.2959542274475098, "logps/rejected": -6.234394550323486, "loss": 0.7646, "nll_loss": 0.7500370144844055, "rewards/accuracies": 1.0, "rewards/chosen": -0.22959543764591217, "rewards/margins": 0.39384400844573975, "rewards/rejected": -0.6234394311904907, "step": 2492 }, { "epoch": 6.825462012320329, "grad_norm": 3.063750982284546, "learning_rate": 6.584931506849315e-07, "log_odds_chosen": 1.420373558998108, "log_odds_ratio": -0.30807411670684814, "logits/chosen": 0.8388924598693848, "logits/rejected": 0.7735146284103394, "logps/chosen": -1.9427660703659058, "logps/rejected": -3.218204975128174, "loss": 0.7111, "nll_loss": 0.6802673935890198, "rewards/accuracies": 1.0, "rewards/chosen": -0.1942766010761261, "rewards/margins": 0.1275438815355301, "rewards/rejected": -0.3218204975128174, "step": 2493 }, { "epoch": 6.828199863107461, "grad_norm": 2.237532615661621, "learning_rate": 6.583561643835615e-07, "log_odds_chosen": 2.4743359088897705, "log_odds_ratio": -0.29884418845176697, "logits/chosen": 0.8286519646644592, "logits/rejected": 0.7868993282318115, "logps/chosen": -1.8693561553955078, "logps/rejected": -4.223078727722168, "loss": 0.6448, "nll_loss": 0.6148809194564819, "rewards/accuracies": 0.75, "rewards/chosen": -0.18693561851978302, "rewards/margins": 0.23537227511405945, "rewards/rejected": -0.4223078489303589, "step": 2494 }, { "epoch": 6.830937713894593, "grad_norm": 6.103456020355225, "learning_rate": 6.582191780821917e-07, "log_odds_chosen": 1.8832590579986572, "log_odds_ratio": -0.47477537393569946, "logits/chosen": 0.741182804107666, "logits/rejected": 0.67120361328125, "logps/chosen": -2.646674394607544, "logps/rejected": -4.456638336181641, "loss": 0.72, "nll_loss": 0.67255699634552, "rewards/accuracies": 0.875, "rewards/chosen": -0.26466745138168335, "rewards/margins": 0.1809963583946228, "rewards/rejected": -0.44566377997398376, "step": 2495 }, { "epoch": 6.833675564681725, "grad_norm": 4.59600305557251, "learning_rate": 6.58082191780822e-07, "log_odds_chosen": 0.9678802490234375, "log_odds_ratio": -0.3869345784187317, "logits/chosen": 0.7158900499343872, "logits/rejected": 0.6504107713699341, "logps/chosen": -1.927952527999878, "logps/rejected": -2.715106725692749, "loss": 0.6865, "nll_loss": 0.6478294134140015, "rewards/accuracies": 0.875, "rewards/chosen": -0.1927952617406845, "rewards/margins": 0.07871541380882263, "rewards/rejected": -0.27151066064834595, "step": 2496 }, { "epoch": 6.836413415468857, "grad_norm": 8.789246559143066, "learning_rate": 6.579452054794519e-07, "log_odds_chosen": 1.120739221572876, "log_odds_ratio": -0.7321817278862, "logits/chosen": 1.01481032371521, "logits/rejected": 1.0220355987548828, "logps/chosen": -2.4184622764587402, "logps/rejected": -3.273158311843872, "loss": 0.7618, "nll_loss": 0.6885885000228882, "rewards/accuracies": 0.75, "rewards/chosen": -0.2418462336063385, "rewards/margins": 0.08546962589025497, "rewards/rejected": -0.3273158669471741, "step": 2497 }, { "epoch": 6.839151266255989, "grad_norm": 3.6578609943389893, "learning_rate": 6.578082191780821e-07, "log_odds_chosen": 1.3079384565353394, "log_odds_ratio": -0.4379594922065735, "logits/chosen": 0.9507233500480652, "logits/rejected": 0.9365695714950562, "logps/chosen": -2.5090575218200684, "logps/rejected": -3.7513065338134766, "loss": 0.6826, "nll_loss": 0.63876873254776, "rewards/accuracies": 0.75, "rewards/chosen": -0.25090575218200684, "rewards/margins": 0.12422490119934082, "rewards/rejected": -0.37513065338134766, "step": 2498 }, { "epoch": 6.841889117043121, "grad_norm": 2.8450090885162354, "learning_rate": 6.576712328767124e-07, "log_odds_chosen": 2.8425002098083496, "log_odds_ratio": -0.23321451246738434, "logits/chosen": 0.6505528092384338, "logits/rejected": 0.6082317233085632, "logps/chosen": -1.8537721633911133, "logps/rejected": -4.533289432525635, "loss": 0.8007, "nll_loss": 0.777388334274292, "rewards/accuracies": 1.0, "rewards/chosen": -0.18537721037864685, "rewards/margins": 0.26795169711112976, "rewards/rejected": -0.453328937292099, "step": 2499 }, { "epoch": 6.844626967830253, "grad_norm": 3.006542444229126, "learning_rate": 6.575342465753423e-07, "log_odds_chosen": 2.415710926055908, "log_odds_ratio": -0.18129105865955353, "logits/chosen": 0.8117494583129883, "logits/rejected": 0.7226083874702454, "logps/chosen": -1.659414529800415, "logps/rejected": -3.893458843231201, "loss": 0.66, "nll_loss": 0.6419137120246887, "rewards/accuracies": 1.0, "rewards/chosen": -0.16594144701957703, "rewards/margins": 0.22340445220470428, "rewards/rejected": -0.3893459141254425, "step": 2500 }, { "epoch": 6.847364818617385, "grad_norm": 4.061727523803711, "learning_rate": 6.573972602739726e-07, "log_odds_chosen": 1.8922840356826782, "log_odds_ratio": -0.3089681565761566, "logits/chosen": 0.6835765242576599, "logits/rejected": 0.5840834975242615, "logps/chosen": -2.202375888824463, "logps/rejected": -3.930037498474121, "loss": 0.7903, "nll_loss": 0.7593818306922913, "rewards/accuracies": 0.875, "rewards/chosen": -0.2202375829219818, "rewards/margins": 0.17276613414287567, "rewards/rejected": -0.3930037021636963, "step": 2501 }, { "epoch": 6.850102669404517, "grad_norm": 4.442209243774414, "learning_rate": 6.572602739726028e-07, "log_odds_chosen": 1.9532219171524048, "log_odds_ratio": -0.5324340462684631, "logits/chosen": 0.6419814825057983, "logits/rejected": 0.5476927161216736, "logps/chosen": -2.602038860321045, "logps/rejected": -4.49874210357666, "loss": 0.9199, "nll_loss": 0.8666952848434448, "rewards/accuracies": 0.75, "rewards/chosen": -0.26020389795303345, "rewards/margins": 0.18967032432556152, "rewards/rejected": -0.4498741924762726, "step": 2502 }, { "epoch": 6.852840520191649, "grad_norm": 2.83870792388916, "learning_rate": 6.571232876712329e-07, "log_odds_chosen": 3.3461692333221436, "log_odds_ratio": -0.10057371109724045, "logits/chosen": 1.174149990081787, "logits/rejected": 1.1773103475570679, "logps/chosen": -2.305497884750366, "logps/rejected": -5.524487495422363, "loss": 0.6674, "nll_loss": 0.657322883605957, "rewards/accuracies": 1.0, "rewards/chosen": -0.23054978251457214, "rewards/margins": 0.3218989968299866, "rewards/rejected": -0.5524487495422363, "step": 2503 }, { "epoch": 6.855578370978781, "grad_norm": 2.62693190574646, "learning_rate": 6.56986301369863e-07, "log_odds_chosen": 1.3808977603912354, "log_odds_ratio": -0.27972087264060974, "logits/chosen": 0.7241038680076599, "logits/rejected": 0.6657155752182007, "logps/chosen": -1.4720890522003174, "logps/rejected": -2.667497158050537, "loss": 0.6454, "nll_loss": 0.6174256205558777, "rewards/accuracies": 1.0, "rewards/chosen": -0.14720889925956726, "rewards/margins": 0.11954084038734436, "rewards/rejected": -0.2667497396469116, "step": 2504 }, { "epoch": 6.8583162217659135, "grad_norm": 4.1811203956604, "learning_rate": 6.568493150684932e-07, "log_odds_chosen": 3.8383073806762695, "log_odds_ratio": -0.20960365235805511, "logits/chosen": 1.2601077556610107, "logits/rejected": 1.2690714597702026, "logps/chosen": -2.483417272567749, "logps/rejected": -6.2202863693237305, "loss": 0.701, "nll_loss": 0.6800411343574524, "rewards/accuracies": 0.875, "rewards/chosen": -0.24834173917770386, "rewards/margins": 0.37368693947792053, "rewards/rejected": -0.6220287084579468, "step": 2505 }, { "epoch": 6.861054072553046, "grad_norm": 2.8056447505950928, "learning_rate": 6.567123287671233e-07, "log_odds_chosen": 1.7841225862503052, "log_odds_ratio": -0.3564874529838562, "logits/chosen": 0.5560635924339294, "logits/rejected": 0.5252683758735657, "logps/chosen": -1.9250757694244385, "logps/rejected": -3.5601258277893066, "loss": 0.6854, "nll_loss": 0.6497544646263123, "rewards/accuracies": 1.0, "rewards/chosen": -0.1925075650215149, "rewards/margins": 0.16350501775741577, "rewards/rejected": -0.35601264238357544, "step": 2506 }, { "epoch": 6.863791923340178, "grad_norm": 9.604708671569824, "learning_rate": 6.565753424657535e-07, "log_odds_chosen": 2.207211494445801, "log_odds_ratio": -0.5215966105461121, "logits/chosen": 0.8187609314918518, "logits/rejected": 0.7330094575881958, "logps/chosen": -2.508782148361206, "logps/rejected": -4.564700603485107, "loss": 0.737, "nll_loss": 0.6848638653755188, "rewards/accuracies": 0.875, "rewards/chosen": -0.250878244638443, "rewards/margins": 0.2055918574333191, "rewards/rejected": -0.4564700722694397, "step": 2507 }, { "epoch": 6.86652977412731, "grad_norm": 4.603046894073486, "learning_rate": 6.564383561643835e-07, "log_odds_chosen": 3.0008692741394043, "log_odds_ratio": -0.36506104469299316, "logits/chosen": 1.2082985639572144, "logits/rejected": 1.2144725322723389, "logps/chosen": -2.362093210220337, "logps/rejected": -5.270532608032227, "loss": 0.6997, "nll_loss": 0.6631971597671509, "rewards/accuracies": 0.875, "rewards/chosen": -0.23620931804180145, "rewards/margins": 0.2908439040184021, "rewards/rejected": -0.5270532369613647, "step": 2508 }, { "epoch": 6.869267624914443, "grad_norm": 2.9062113761901855, "learning_rate": 6.563013698630137e-07, "log_odds_chosen": 2.7006053924560547, "log_odds_ratio": -0.2863336205482483, "logits/chosen": 0.8602246046066284, "logits/rejected": 0.8787091970443726, "logps/chosen": -2.1142234802246094, "logps/rejected": -4.709096908569336, "loss": 0.7181, "nll_loss": 0.689475417137146, "rewards/accuracies": 0.875, "rewards/chosen": -0.21142235398292542, "rewards/margins": 0.2594873011112213, "rewards/rejected": -0.4709096848964691, "step": 2509 }, { "epoch": 6.872005475701574, "grad_norm": 6.657435417175293, "learning_rate": 6.561643835616439e-07, "log_odds_chosen": 2.100156784057617, "log_odds_ratio": -0.43799683451652527, "logits/chosen": 0.5623872876167297, "logits/rejected": 0.5537285804748535, "logps/chosen": -2.964430332183838, "logps/rejected": -4.873084545135498, "loss": 0.7901, "nll_loss": 0.7463356256484985, "rewards/accuracies": 0.875, "rewards/chosen": -0.29644304513931274, "rewards/margins": 0.1908654421567917, "rewards/rejected": -0.48730847239494324, "step": 2510 }, { "epoch": 6.874743326488707, "grad_norm": 2.5925607681274414, "learning_rate": 6.560273972602739e-07, "log_odds_chosen": 2.7921199798583984, "log_odds_ratio": -0.2177276909351349, "logits/chosen": 0.7503076791763306, "logits/rejected": 0.7620216608047485, "logps/chosen": -2.257449150085449, "logps/rejected": -4.934175968170166, "loss": 0.689, "nll_loss": 0.6672225594520569, "rewards/accuracies": 1.0, "rewards/chosen": -0.22574490308761597, "rewards/margins": 0.26767271757125854, "rewards/rejected": -0.4934176206588745, "step": 2511 }, { "epoch": 6.877481177275839, "grad_norm": 3.7978718280792236, "learning_rate": 6.558904109589041e-07, "log_odds_chosen": 0.8752643465995789, "log_odds_ratio": -0.8270032405853271, "logits/chosen": 0.9567174315452576, "logits/rejected": 0.9757855534553528, "logps/chosen": -2.565849781036377, "logps/rejected": -3.3899314403533936, "loss": 0.6953, "nll_loss": 0.612564206123352, "rewards/accuracies": 0.625, "rewards/chosen": -0.2565850019454956, "rewards/margins": 0.08240818977355957, "rewards/rejected": -0.3389931619167328, "step": 2512 }, { "epoch": 6.880219028062971, "grad_norm": 3.2595865726470947, "learning_rate": 6.557534246575343e-07, "log_odds_chosen": 1.2490098476409912, "log_odds_ratio": -0.3100912570953369, "logits/chosen": 0.5878355503082275, "logits/rejected": 0.5488283038139343, "logps/chosen": -2.0955636501312256, "logps/rejected": -3.2387096881866455, "loss": 0.7894, "nll_loss": 0.7583986520767212, "rewards/accuracies": 1.0, "rewards/chosen": -0.20955635607242584, "rewards/margins": 0.11431462317705154, "rewards/rejected": -0.323870986700058, "step": 2513 }, { "epoch": 6.882956878850103, "grad_norm": 2.8252015113830566, "learning_rate": 6.556164383561643e-07, "log_odds_chosen": 1.6007366180419922, "log_odds_ratio": -0.23329702019691467, "logits/chosen": 0.8206494450569153, "logits/rejected": 0.7812072038650513, "logps/chosen": -2.1766180992126465, "logps/rejected": -3.645407199859619, "loss": 0.747, "nll_loss": 0.7236561179161072, "rewards/accuracies": 1.0, "rewards/chosen": -0.21766182780265808, "rewards/margins": 0.1468788981437683, "rewards/rejected": -0.3645407557487488, "step": 2514 }, { "epoch": 6.885694729637235, "grad_norm": 2.8544390201568604, "learning_rate": 6.554794520547945e-07, "log_odds_chosen": 1.6864089965820312, "log_odds_ratio": -0.2902551293373108, "logits/chosen": 0.913604736328125, "logits/rejected": 0.8891712427139282, "logps/chosen": -1.662651777267456, "logps/rejected": -3.196174144744873, "loss": 0.6709, "nll_loss": 0.6418282389640808, "rewards/accuracies": 1.0, "rewards/chosen": -0.16626517474651337, "rewards/margins": 0.1533522605895996, "rewards/rejected": -0.31961745023727417, "step": 2515 }, { "epoch": 6.888432580424367, "grad_norm": 3.127955675125122, "learning_rate": 6.553424657534247e-07, "log_odds_chosen": 1.3849804401397705, "log_odds_ratio": -0.5745059847831726, "logits/chosen": 0.9727723002433777, "logits/rejected": 0.9302013516426086, "logps/chosen": -1.7865586280822754, "logps/rejected": -3.1122100353240967, "loss": 0.7115, "nll_loss": 0.6540706157684326, "rewards/accuracies": 0.75, "rewards/chosen": -0.17865587770938873, "rewards/margins": 0.13256509602069855, "rewards/rejected": -0.3112209737300873, "step": 2516 }, { "epoch": 6.891170431211499, "grad_norm": 3.6294572353363037, "learning_rate": 6.552054794520548e-07, "log_odds_chosen": 1.8985917568206787, "log_odds_ratio": -0.5471518635749817, "logits/chosen": 1.0196573734283447, "logits/rejected": 1.0370603799819946, "logps/chosen": -2.1292903423309326, "logps/rejected": -3.8868746757507324, "loss": 0.7235, "nll_loss": 0.6687401533126831, "rewards/accuracies": 0.875, "rewards/chosen": -0.21292904019355774, "rewards/margins": 0.1757584512233734, "rewards/rejected": -0.38868749141693115, "step": 2517 }, { "epoch": 6.893908281998631, "grad_norm": 6.468163967132568, "learning_rate": 6.550684931506849e-07, "log_odds_chosen": 2.6630005836486816, "log_odds_ratio": -0.34097522497177124, "logits/chosen": 0.9485905170440674, "logits/rejected": 0.9102666974067688, "logps/chosen": -2.6672749519348145, "logps/rejected": -5.201905250549316, "loss": 0.7325, "nll_loss": 0.6984079480171204, "rewards/accuracies": 0.875, "rewards/chosen": -0.266727477312088, "rewards/margins": 0.25346308946609497, "rewards/rejected": -0.5201905965805054, "step": 2518 }, { "epoch": 6.896646132785763, "grad_norm": 4.241719722747803, "learning_rate": 6.549315068493151e-07, "log_odds_chosen": 1.825981616973877, "log_odds_ratio": -0.49470648169517517, "logits/chosen": 0.745202898979187, "logits/rejected": 0.6557623744010925, "logps/chosen": -2.855116605758667, "logps/rejected": -4.624505996704102, "loss": 0.868, "nll_loss": 0.8185379505157471, "rewards/accuracies": 0.75, "rewards/chosen": -0.28551167249679565, "rewards/margins": 0.1769389659166336, "rewards/rejected": -0.46245062351226807, "step": 2519 }, { "epoch": 6.899383983572895, "grad_norm": 3.2125296592712402, "learning_rate": 6.547945205479452e-07, "log_odds_chosen": 1.156615972518921, "log_odds_ratio": -0.4111572504043579, "logits/chosen": 0.5474008321762085, "logits/rejected": 0.5280425548553467, "logps/chosen": -1.7976908683776855, "logps/rejected": -2.7638137340545654, "loss": 0.7876, "nll_loss": 0.746443510055542, "rewards/accuracies": 0.875, "rewards/chosen": -0.17976908385753632, "rewards/margins": 0.09661227464675903, "rewards/rejected": -0.27638137340545654, "step": 2520 }, { "epoch": 6.9021218343600275, "grad_norm": 2.5147619247436523, "learning_rate": 6.546575342465753e-07, "log_odds_chosen": 4.788254261016846, "log_odds_ratio": -0.13125669956207275, "logits/chosen": 0.8704180717468262, "logits/rejected": 0.8597102165222168, "logps/chosen": -1.6626925468444824, "logps/rejected": -6.166000843048096, "loss": 0.6519, "nll_loss": 0.6387525796890259, "rewards/accuracies": 1.0, "rewards/chosen": -0.16626927256584167, "rewards/margins": 0.45033085346221924, "rewards/rejected": -0.6166000962257385, "step": 2521 }, { "epoch": 6.9048596851471595, "grad_norm": 2.8803513050079346, "learning_rate": 6.545205479452055e-07, "log_odds_chosen": 1.8843331336975098, "log_odds_ratio": -0.2528765797615051, "logits/chosen": 0.7812017202377319, "logits/rejected": 0.7682443857192993, "logps/chosen": -2.2126240730285645, "logps/rejected": -3.9575817584991455, "loss": 0.6316, "nll_loss": 0.6062757968902588, "rewards/accuracies": 0.875, "rewards/chosen": -0.22126242518424988, "rewards/margins": 0.17449575662612915, "rewards/rejected": -0.3957582116127014, "step": 2522 }, { "epoch": 6.907597535934292, "grad_norm": 2.799222707748413, "learning_rate": 6.543835616438356e-07, "log_odds_chosen": 2.7021234035491943, "log_odds_ratio": -0.1624656468629837, "logits/chosen": 0.8037574291229248, "logits/rejected": 0.8058199286460876, "logps/chosen": -2.070952892303467, "logps/rejected": -4.640965938568115, "loss": 0.7102, "nll_loss": 0.6939413547515869, "rewards/accuracies": 1.0, "rewards/chosen": -0.20709531009197235, "rewards/margins": 0.25700128078460693, "rewards/rejected": -0.4640966057777405, "step": 2523 }, { "epoch": 6.910335386721424, "grad_norm": 4.566585063934326, "learning_rate": 6.542465753424658e-07, "log_odds_chosen": 2.583333730697632, "log_odds_ratio": -0.2771145701408386, "logits/chosen": 1.0773890018463135, "logits/rejected": 1.0560803413391113, "logps/chosen": -1.981979250907898, "logps/rejected": -4.349451065063477, "loss": 0.6794, "nll_loss": 0.651735246181488, "rewards/accuracies": 0.875, "rewards/chosen": -0.19819793105125427, "rewards/margins": 0.23674722015857697, "rewards/rejected": -0.43494513630867004, "step": 2524 }, { "epoch": 6.913073237508556, "grad_norm": 4.7407612800598145, "learning_rate": 6.541095890410958e-07, "log_odds_chosen": 4.86863374710083, "log_odds_ratio": -0.14645317196846008, "logits/chosen": 1.0242013931274414, "logits/rejected": 1.0142070055007935, "logps/chosen": -1.869458794593811, "logps/rejected": -6.564375877380371, "loss": 0.7449, "nll_loss": 0.7302157878875732, "rewards/accuracies": 1.0, "rewards/chosen": -0.18694588541984558, "rewards/margins": 0.46949172019958496, "rewards/rejected": -0.6564376354217529, "step": 2525 }, { "epoch": 6.915811088295688, "grad_norm": 2.691843271255493, "learning_rate": 6.53972602739726e-07, "log_odds_chosen": 1.9308170080184937, "log_odds_ratio": -0.3918837904930115, "logits/chosen": 0.7926390767097473, "logits/rejected": 0.7450490593910217, "logps/chosen": -2.1287009716033936, "logps/rejected": -3.9632816314697266, "loss": 0.7568, "nll_loss": 0.7176523208618164, "rewards/accuracies": 0.75, "rewards/chosen": -0.21287009119987488, "rewards/margins": 0.18345806002616882, "rewards/rejected": -0.3963281512260437, "step": 2526 }, { "epoch": 6.91854893908282, "grad_norm": 3.0678775310516357, "learning_rate": 6.538356164383562e-07, "log_odds_chosen": 3.3624000549316406, "log_odds_ratio": -0.1375407725572586, "logits/chosen": 1.0540119409561157, "logits/rejected": 1.0592436790466309, "logps/chosen": -2.3499274253845215, "logps/rejected": -5.572963714599609, "loss": 0.6822, "nll_loss": 0.668464183807373, "rewards/accuracies": 1.0, "rewards/chosen": -0.23499272763729095, "rewards/margins": 0.3223036527633667, "rewards/rejected": -0.5572963953018188, "step": 2527 }, { "epoch": 6.921286789869952, "grad_norm": 3.235203742980957, "learning_rate": 6.536986301369862e-07, "log_odds_chosen": 2.517265558242798, "log_odds_ratio": -0.3037804663181305, "logits/chosen": 0.8826184868812561, "logits/rejected": 0.8830685615539551, "logps/chosen": -2.129720449447632, "logps/rejected": -4.484343528747559, "loss": 0.7141, "nll_loss": 0.6837277412414551, "rewards/accuracies": 0.875, "rewards/chosen": -0.212972030043602, "rewards/margins": 0.23546233773231506, "rewards/rejected": -0.44843435287475586, "step": 2528 }, { "epoch": 6.924024640657084, "grad_norm": 2.8095600605010986, "learning_rate": 6.535616438356164e-07, "log_odds_chosen": 1.8447903394699097, "log_odds_ratio": -0.28740042448043823, "logits/chosen": 0.8441062569618225, "logits/rejected": 0.8332810997962952, "logps/chosen": -1.503453016281128, "logps/rejected": -3.1253178119659424, "loss": 0.6567, "nll_loss": 0.6280043125152588, "rewards/accuracies": 0.875, "rewards/chosen": -0.1503453105688095, "rewards/margins": 0.16218645870685577, "rewards/rejected": -0.3125317692756653, "step": 2529 }, { "epoch": 6.926762491444216, "grad_norm": 2.8099076747894287, "learning_rate": 6.534246575342466e-07, "log_odds_chosen": 3.034196138381958, "log_odds_ratio": -0.1420125663280487, "logits/chosen": 0.6567972898483276, "logits/rejected": 0.5872105360031128, "logps/chosen": -1.7712054252624512, "logps/rejected": -4.618156433105469, "loss": 0.6659, "nll_loss": 0.6517449021339417, "rewards/accuracies": 1.0, "rewards/chosen": -0.17712055146694183, "rewards/margins": 0.2846951186656952, "rewards/rejected": -0.46181565523147583, "step": 2530 }, { "epoch": 6.929500342231348, "grad_norm": 3.357677698135376, "learning_rate": 6.532876712328767e-07, "log_odds_chosen": 1.6066868305206299, "log_odds_ratio": -0.3003620505332947, "logits/chosen": 0.7162830829620361, "logits/rejected": 0.7092821598052979, "logps/chosen": -2.1011836528778076, "logps/rejected": -3.563579559326172, "loss": 0.7623, "nll_loss": 0.7322983741760254, "rewards/accuracies": 0.875, "rewards/chosen": -0.2101183533668518, "rewards/margins": 0.14623957872390747, "rewards/rejected": -0.3563579320907593, "step": 2531 }, { "epoch": 6.93223819301848, "grad_norm": 2.724348306655884, "learning_rate": 6.531506849315068e-07, "log_odds_chosen": 1.4882371425628662, "log_odds_ratio": -0.5226987600326538, "logits/chosen": 1.0483620166778564, "logits/rejected": 0.9658076763153076, "logps/chosen": -2.3046164512634277, "logps/rejected": -3.6884756088256836, "loss": 0.7484, "nll_loss": 0.6961143016815186, "rewards/accuracies": 0.875, "rewards/chosen": -0.23046162724494934, "rewards/margins": 0.13838592171669006, "rewards/rejected": -0.3688475489616394, "step": 2532 }, { "epoch": 6.934976043805612, "grad_norm": 4.02042818069458, "learning_rate": 6.53013698630137e-07, "log_odds_chosen": 1.8706111907958984, "log_odds_ratio": -0.43698054552078247, "logits/chosen": 0.8952321410179138, "logits/rejected": 0.9288496375083923, "logps/chosen": -2.5281550884246826, "logps/rejected": -4.2465972900390625, "loss": 0.7771, "nll_loss": 0.7334203720092773, "rewards/accuracies": 0.75, "rewards/chosen": -0.25281551480293274, "rewards/margins": 0.1718442440032959, "rewards/rejected": -0.42465975880622864, "step": 2533 }, { "epoch": 6.937713894592744, "grad_norm": 2.5576012134552, "learning_rate": 6.528767123287671e-07, "log_odds_chosen": 1.6500505208969116, "log_odds_ratio": -0.2533244788646698, "logits/chosen": 0.8496667146682739, "logits/rejected": 0.7933405637741089, "logps/chosen": -1.9668185710906982, "logps/rejected": -3.501716136932373, "loss": 0.6667, "nll_loss": 0.6413198709487915, "rewards/accuracies": 1.0, "rewards/chosen": -0.19668185710906982, "rewards/margins": 0.15348976850509644, "rewards/rejected": -0.35017162561416626, "step": 2534 }, { "epoch": 6.940451745379876, "grad_norm": 4.222360610961914, "learning_rate": 6.527397260273972e-07, "log_odds_chosen": 1.8703564405441284, "log_odds_ratio": -0.31632596254348755, "logits/chosen": 0.9529988169670105, "logits/rejected": 0.973544180393219, "logps/chosen": -2.4827497005462646, "logps/rejected": -4.274030685424805, "loss": 0.6802, "nll_loss": 0.6486003398895264, "rewards/accuracies": 1.0, "rewards/chosen": -0.24827498197555542, "rewards/margins": 0.17912808060646057, "rewards/rejected": -0.427403062582016, "step": 2535 }, { "epoch": 6.943189596167009, "grad_norm": 2.8678627014160156, "learning_rate": 6.526027397260274e-07, "log_odds_chosen": 2.5821940898895264, "log_odds_ratio": -0.15866783261299133, "logits/chosen": 0.9263084530830383, "logits/rejected": 0.8960237503051758, "logps/chosen": -1.5849677324295044, "logps/rejected": -3.8601555824279785, "loss": 0.707, "nll_loss": 0.6911263465881348, "rewards/accuracies": 1.0, "rewards/chosen": -0.15849676728248596, "rewards/margins": 0.22751881182193756, "rewards/rejected": -0.3860155940055847, "step": 2536 }, { "epoch": 6.9459274469541405, "grad_norm": 4.284867286682129, "learning_rate": 6.524657534246575e-07, "log_odds_chosen": 3.3065285682678223, "log_odds_ratio": -0.2797536551952362, "logits/chosen": 0.7027056217193604, "logits/rejected": 0.7069026827812195, "logps/chosen": -2.414536714553833, "logps/rejected": -5.577394008636475, "loss": 0.7328, "nll_loss": 0.7048217058181763, "rewards/accuracies": 0.875, "rewards/chosen": -0.24145369231700897, "rewards/margins": 0.31628572940826416, "rewards/rejected": -0.5577394366264343, "step": 2537 }, { "epoch": 6.9486652977412735, "grad_norm": 3.0147125720977783, "learning_rate": 6.523287671232877e-07, "log_odds_chosen": 1.7690547704696655, "log_odds_ratio": -0.2459091693162918, "logits/chosen": 1.1462182998657227, "logits/rejected": 1.14311945438385, "logps/chosen": -2.0528371334075928, "logps/rejected": -3.683037757873535, "loss": 0.6417, "nll_loss": 0.6171443462371826, "rewards/accuracies": 1.0, "rewards/chosen": -0.2052837312221527, "rewards/margins": 0.1630200445652008, "rewards/rejected": -0.36830374598503113, "step": 2538 }, { "epoch": 6.951403148528406, "grad_norm": 3.2379250526428223, "learning_rate": 6.521917808219177e-07, "log_odds_chosen": 3.0067882537841797, "log_odds_ratio": -0.2401847243309021, "logits/chosen": 0.6078783273696899, "logits/rejected": 0.630614161491394, "logps/chosen": -1.9839184284210205, "logps/rejected": -4.850700855255127, "loss": 0.7629, "nll_loss": 0.7388452291488647, "rewards/accuracies": 1.0, "rewards/chosen": -0.1983918398618698, "rewards/margins": 0.2866782546043396, "rewards/rejected": -0.4850700795650482, "step": 2539 }, { "epoch": 6.954140999315538, "grad_norm": 2.7343685626983643, "learning_rate": 6.520547945205479e-07, "log_odds_chosen": 2.059549570083618, "log_odds_ratio": -0.3419450521469116, "logits/chosen": 0.9341205358505249, "logits/rejected": 0.9218257665634155, "logps/chosen": -1.840061902999878, "logps/rejected": -3.7953901290893555, "loss": 0.7173, "nll_loss": 0.683137834072113, "rewards/accuracies": 0.875, "rewards/chosen": -0.1840062141418457, "rewards/margins": 0.19553282856941223, "rewards/rejected": -0.37953901290893555, "step": 2540 }, { "epoch": 6.95687885010267, "grad_norm": 2.736895799636841, "learning_rate": 6.519178082191781e-07, "log_odds_chosen": 1.9201658964157104, "log_odds_ratio": -0.2600809335708618, "logits/chosen": 0.6738616824150085, "logits/rejected": 0.5816963911056519, "logps/chosen": -1.4015604257583618, "logps/rejected": -3.09072208404541, "loss": 0.7148, "nll_loss": 0.6888389587402344, "rewards/accuracies": 1.0, "rewards/chosen": -0.14015606045722961, "rewards/margins": 0.16891616582870483, "rewards/rejected": -0.30907219648361206, "step": 2541 }, { "epoch": 6.959616700889802, "grad_norm": 4.415574550628662, "learning_rate": 6.517808219178081e-07, "log_odds_chosen": 1.5311580896377563, "log_odds_ratio": -0.5453693866729736, "logits/chosen": 0.8964723348617554, "logits/rejected": 0.9155615568161011, "logps/chosen": -2.8540918827056885, "logps/rejected": -4.317387580871582, "loss": 0.819, "nll_loss": 0.7644678950309753, "rewards/accuracies": 0.875, "rewards/chosen": -0.28540918231010437, "rewards/margins": 0.14632956683635712, "rewards/rejected": -0.4317387342453003, "step": 2542 }, { "epoch": 6.962354551676934, "grad_norm": 2.836932420730591, "learning_rate": 6.516438356164383e-07, "log_odds_chosen": 1.716498851776123, "log_odds_ratio": -0.2835979461669922, "logits/chosen": 0.9376804828643799, "logits/rejected": 0.9142671823501587, "logps/chosen": -1.9961645603179932, "logps/rejected": -3.5904977321624756, "loss": 0.681, "nll_loss": 0.6526404023170471, "rewards/accuracies": 1.0, "rewards/chosen": -0.1996164619922638, "rewards/margins": 0.15943332016468048, "rewards/rejected": -0.35904979705810547, "step": 2543 }, { "epoch": 6.965092402464066, "grad_norm": 3.0483808517456055, "learning_rate": 6.515068493150685e-07, "log_odds_chosen": 1.8971807956695557, "log_odds_ratio": -0.23876842856407166, "logits/chosen": 0.8340731859207153, "logits/rejected": 0.7922160625457764, "logps/chosen": -1.5704560279846191, "logps/rejected": -3.141057014465332, "loss": 0.5977, "nll_loss": 0.5738042593002319, "rewards/accuracies": 0.875, "rewards/chosen": -0.15704560279846191, "rewards/margins": 0.15706013143062592, "rewards/rejected": -0.314105749130249, "step": 2544 }, { "epoch": 6.967830253251198, "grad_norm": 6.120509147644043, "learning_rate": 6.513698630136986e-07, "log_odds_chosen": 1.2616820335388184, "log_odds_ratio": -0.33174532651901245, "logits/chosen": 1.0583363771438599, "logits/rejected": 1.0027167797088623, "logps/chosen": -2.2884364128112793, "logps/rejected": -3.419285297393799, "loss": 0.7313, "nll_loss": 0.6981610655784607, "rewards/accuracies": 0.875, "rewards/chosen": -0.22884365916252136, "rewards/margins": 0.11308488249778748, "rewards/rejected": -0.34192851185798645, "step": 2545 }, { "epoch": 6.97056810403833, "grad_norm": 2.730499744415283, "learning_rate": 6.512328767123287e-07, "log_odds_chosen": 1.5322091579437256, "log_odds_ratio": -0.31067413091659546, "logits/chosen": 0.772268533706665, "logits/rejected": 0.784044623374939, "logps/chosen": -2.125183582305908, "logps/rejected": -3.553096294403076, "loss": 0.6651, "nll_loss": 0.6340317726135254, "rewards/accuracies": 0.875, "rewards/chosen": -0.2125183790922165, "rewards/margins": 0.1427912712097168, "rewards/rejected": -0.3553096354007721, "step": 2546 }, { "epoch": 6.973305954825462, "grad_norm": 2.478066921234131, "learning_rate": 6.510958904109589e-07, "log_odds_chosen": 3.592848777770996, "log_odds_ratio": -0.17838992178440094, "logits/chosen": 0.5524237155914307, "logits/rejected": 0.489847332239151, "logps/chosen": -1.6286729574203491, "logps/rejected": -5.037732124328613, "loss": 0.7266, "nll_loss": 0.7088016271591187, "rewards/accuracies": 1.0, "rewards/chosen": -0.16286727786064148, "rewards/margins": 0.34090590476989746, "rewards/rejected": -0.5037732124328613, "step": 2547 }, { "epoch": 6.976043805612594, "grad_norm": 2.7185704708099365, "learning_rate": 6.50958904109589e-07, "log_odds_chosen": 1.6288729906082153, "log_odds_ratio": -0.2657512426376343, "logits/chosen": 0.8167972564697266, "logits/rejected": 0.7662776112556458, "logps/chosen": -1.7127031087875366, "logps/rejected": -3.162799835205078, "loss": 0.7391, "nll_loss": 0.7125506401062012, "rewards/accuracies": 0.875, "rewards/chosen": -0.17127031087875366, "rewards/margins": 0.14500968158245087, "rewards/rejected": -0.3162800073623657, "step": 2548 }, { "epoch": 6.978781656399726, "grad_norm": 3.4427785873413086, "learning_rate": 6.508219178082191e-07, "log_odds_chosen": 2.3957345485687256, "log_odds_ratio": -0.1238689124584198, "logits/chosen": 0.8949561715126038, "logits/rejected": 0.8687856793403625, "logps/chosen": -1.8037967681884766, "logps/rejected": -4.012126922607422, "loss": 0.6187, "nll_loss": 0.6063077449798584, "rewards/accuracies": 1.0, "rewards/chosen": -0.18037967383861542, "rewards/margins": 0.22083303332328796, "rewards/rejected": -0.4012126922607422, "step": 2549 }, { "epoch": 6.981519507186858, "grad_norm": 3.3189504146575928, "learning_rate": 6.506849315068493e-07, "log_odds_chosen": 1.6215676069259644, "log_odds_ratio": -0.24541443586349487, "logits/chosen": 0.6459070444107056, "logits/rejected": 0.5972601175308228, "logps/chosen": -1.4821879863739014, "logps/rejected": -2.8739774227142334, "loss": 0.6687, "nll_loss": 0.644112229347229, "rewards/accuracies": 0.875, "rewards/chosen": -0.1482188105583191, "rewards/margins": 0.13917894661426544, "rewards/rejected": -0.2873977720737457, "step": 2550 }, { "epoch": 6.98425735797399, "grad_norm": 3.7488441467285156, "learning_rate": 6.505479452054794e-07, "log_odds_chosen": 1.7537918090820312, "log_odds_ratio": -0.39009928703308105, "logits/chosen": 0.7573105692863464, "logits/rejected": 0.7899307608604431, "logps/chosen": -2.5319936275482178, "logps/rejected": -4.213412284851074, "loss": 0.6974, "nll_loss": 0.6583985686302185, "rewards/accuracies": 0.875, "rewards/chosen": -0.25319936871528625, "rewards/margins": 0.16814188659191132, "rewards/rejected": -0.4213412404060364, "step": 2551 }, { "epoch": 6.9869952087611225, "grad_norm": 2.613701343536377, "learning_rate": 6.504109589041096e-07, "log_odds_chosen": 1.9794248342514038, "log_odds_ratio": -0.2407844066619873, "logits/chosen": 0.7417668104171753, "logits/rejected": 0.721508264541626, "logps/chosen": -1.5748529434204102, "logps/rejected": -3.3884642124176025, "loss": 0.6936, "nll_loss": 0.6694936752319336, "rewards/accuracies": 1.0, "rewards/chosen": -0.15748527646064758, "rewards/margins": 0.1813611388206482, "rewards/rejected": -0.33884644508361816, "step": 2552 }, { "epoch": 6.9897330595482545, "grad_norm": 4.182573318481445, "learning_rate": 6.502739726027397e-07, "log_odds_chosen": 1.2427623271942139, "log_odds_ratio": -0.3090147376060486, "logits/chosen": 0.755532443523407, "logits/rejected": 0.7439817190170288, "logps/chosen": -1.809605360031128, "logps/rejected": -2.8868625164031982, "loss": 0.6924, "nll_loss": 0.6614739298820496, "rewards/accuracies": 0.875, "rewards/chosen": -0.1809605360031128, "rewards/margins": 0.10772567987442017, "rewards/rejected": -0.28868621587753296, "step": 2553 }, { "epoch": 6.992470910335387, "grad_norm": 3.124962568283081, "learning_rate": 6.501369863013698e-07, "log_odds_chosen": 1.8486956357955933, "log_odds_ratio": -0.45907729864120483, "logits/chosen": 0.7562817335128784, "logits/rejected": 0.8058999180793762, "logps/chosen": -2.284848690032959, "logps/rejected": -4.010239124298096, "loss": 0.8399, "nll_loss": 0.7940099239349365, "rewards/accuracies": 0.875, "rewards/chosen": -0.2284848690032959, "rewards/margins": 0.17253902554512024, "rewards/rejected": -0.4010239243507385, "step": 2554 }, { "epoch": 6.995208761122519, "grad_norm": 3.176723003387451, "learning_rate": 6.5e-07, "log_odds_chosen": 3.0239059925079346, "log_odds_ratio": -0.3834070861339569, "logits/chosen": 0.8422364592552185, "logits/rejected": 0.8302006721496582, "logps/chosen": -1.7097771167755127, "logps/rejected": -4.624359607696533, "loss": 0.7095, "nll_loss": 0.6711854934692383, "rewards/accuracies": 0.75, "rewards/chosen": -0.17097771167755127, "rewards/margins": 0.29145824909210205, "rewards/rejected": -0.4624359607696533, "step": 2555 }, { "epoch": 6.997946611909651, "grad_norm": 6.2362751960754395, "learning_rate": 6.4986301369863e-07, "log_odds_chosen": 0.9619276523590088, "log_odds_ratio": -0.6116777658462524, "logits/chosen": 0.9655709266662598, "logits/rejected": 0.9686816930770874, "logps/chosen": -2.8376049995422363, "logps/rejected": -3.6747798919677734, "loss": 0.7369, "nll_loss": 0.6757014989852905, "rewards/accuracies": 0.75, "rewards/chosen": -0.2837604880332947, "rewards/margins": 0.08371749520301819, "rewards/rejected": -0.36747798323631287, "step": 2556 }, { "epoch": 7.000684462696783, "grad_norm": 3.0578291416168213, "learning_rate": 6.497260273972602e-07, "log_odds_chosen": 0.8892765045166016, "log_odds_ratio": -0.560807466506958, "logits/chosen": 0.8642832636833191, "logits/rejected": 0.8121272921562195, "logps/chosen": -1.7450945377349854, "logps/rejected": -2.478435516357422, "loss": 0.7622, "nll_loss": 0.7060712575912476, "rewards/accuracies": 0.75, "rewards/chosen": -0.1745094656944275, "rewards/margins": 0.07333409786224365, "rewards/rejected": -0.24784354865550995, "step": 2557 }, { "epoch": 7.003422313483915, "grad_norm": 3.0679404735565186, "learning_rate": 6.495890410958904e-07, "log_odds_chosen": 2.9514050483703613, "log_odds_ratio": -0.07550941407680511, "logits/chosen": 0.9414123296737671, "logits/rejected": 0.9816670417785645, "logps/chosen": -1.6943106651306152, "logps/rejected": -4.424407958984375, "loss": 0.6647, "nll_loss": 0.6571987271308899, "rewards/accuracies": 1.0, "rewards/chosen": -0.16943109035491943, "rewards/margins": 0.273009717464447, "rewards/rejected": -0.44244077801704407, "step": 2558 }, { "epoch": 7.006160164271047, "grad_norm": 2.7852065563201904, "learning_rate": 6.494520547945205e-07, "log_odds_chosen": 2.447812557220459, "log_odds_ratio": -0.22998477518558502, "logits/chosen": 0.712117075920105, "logits/rejected": 0.7308847308158875, "logps/chosen": -1.7473814487457275, "logps/rejected": -3.9229917526245117, "loss": 0.7651, "nll_loss": 0.7421011328697205, "rewards/accuracies": 1.0, "rewards/chosen": -0.17473815381526947, "rewards/margins": 0.2175610363483429, "rewards/rejected": -0.39229917526245117, "step": 2559 }, { "epoch": 7.008898015058179, "grad_norm": 3.6924338340759277, "learning_rate": 6.493150684931506e-07, "log_odds_chosen": 1.9732388257980347, "log_odds_ratio": -0.2357034534215927, "logits/chosen": 0.9478216171264648, "logits/rejected": 0.9756073951721191, "logps/chosen": -1.899095058441162, "logps/rejected": -3.7285032272338867, "loss": 0.6166, "nll_loss": 0.5930181741714478, "rewards/accuracies": 0.875, "rewards/chosen": -0.18990951776504517, "rewards/margins": 0.18294084072113037, "rewards/rejected": -0.37285032868385315, "step": 2560 }, { "epoch": 7.011635865845311, "grad_norm": 2.7583632469177246, "learning_rate": 6.491780821917808e-07, "log_odds_chosen": 2.9166009426116943, "log_odds_ratio": -0.207136869430542, "logits/chosen": 0.7967103719711304, "logits/rejected": 0.8222880959510803, "logps/chosen": -2.1040380001068115, "logps/rejected": -4.892130374908447, "loss": 0.7358, "nll_loss": 0.7151249051094055, "rewards/accuracies": 1.0, "rewards/chosen": -0.21040380001068115, "rewards/margins": 0.27880921959877014, "rewards/rejected": -0.4892130494117737, "step": 2561 }, { "epoch": 7.014373716632443, "grad_norm": 3.257871627807617, "learning_rate": 6.490410958904109e-07, "log_odds_chosen": 2.9381906986236572, "log_odds_ratio": -0.20912298560142517, "logits/chosen": 0.9036493301391602, "logits/rejected": 0.8874311447143555, "logps/chosen": -1.8640382289886475, "logps/rejected": -4.635977745056152, "loss": 0.6565, "nll_loss": 0.6355811357498169, "rewards/accuracies": 0.875, "rewards/chosen": -0.1864038109779358, "rewards/margins": 0.27719396352767944, "rewards/rejected": -0.46359777450561523, "step": 2562 }, { "epoch": 7.017111567419575, "grad_norm": 2.404315710067749, "learning_rate": 6.48904109589041e-07, "log_odds_chosen": 2.394587755203247, "log_odds_ratio": -0.15828239917755127, "logits/chosen": 0.7462874054908752, "logits/rejected": 0.7198741436004639, "logps/chosen": -1.3672295808792114, "logps/rejected": -3.5089008808135986, "loss": 0.6251, "nll_loss": 0.6092914938926697, "rewards/accuracies": 1.0, "rewards/chosen": -0.13672296702861786, "rewards/margins": 0.21416714787483215, "rewards/rejected": -0.3508901298046112, "step": 2563 }, { "epoch": 7.019849418206708, "grad_norm": 3.942134380340576, "learning_rate": 6.487671232876712e-07, "log_odds_chosen": 1.5884381532669067, "log_odds_ratio": -0.34511232376098633, "logits/chosen": 1.0571322441101074, "logits/rejected": 1.0701338052749634, "logps/chosen": -2.2501587867736816, "logps/rejected": -3.740131378173828, "loss": 0.7174, "nll_loss": 0.6829084753990173, "rewards/accuracies": 0.75, "rewards/chosen": -0.22501586377620697, "rewards/margins": 0.14899727702140808, "rewards/rejected": -0.37401315569877625, "step": 2564 }, { "epoch": 7.02258726899384, "grad_norm": 3.7864115238189697, "learning_rate": 6.486301369863013e-07, "log_odds_chosen": 0.6800689697265625, "log_odds_ratio": -0.6394522786140442, "logits/chosen": 0.61601322889328, "logits/rejected": 0.5866420865058899, "logps/chosen": -2.5881800651550293, "logps/rejected": -3.2711246013641357, "loss": 0.7858, "nll_loss": 0.7218617796897888, "rewards/accuracies": 0.5, "rewards/chosen": -0.25881800055503845, "rewards/margins": 0.0682944655418396, "rewards/rejected": -0.32711246609687805, "step": 2565 }, { "epoch": 7.025325119780972, "grad_norm": 4.911003589630127, "learning_rate": 6.484931506849315e-07, "log_odds_chosen": 2.6787912845611572, "log_odds_ratio": -0.36864742636680603, "logits/chosen": 1.1366503238677979, "logits/rejected": 1.1118736267089844, "logps/chosen": -2.5736875534057617, "logps/rejected": -5.118130683898926, "loss": 0.7359, "nll_loss": 0.6990593671798706, "rewards/accuracies": 0.875, "rewards/chosen": -0.2573687434196472, "rewards/margins": 0.25444433093070984, "rewards/rejected": -0.5118131041526794, "step": 2566 }, { "epoch": 7.028062970568104, "grad_norm": 3.9131205081939697, "learning_rate": 6.483561643835616e-07, "log_odds_chosen": 3.1909995079040527, "log_odds_ratio": -0.35279250144958496, "logits/chosen": 1.0137293338775635, "logits/rejected": 1.0079221725463867, "logps/chosen": -2.2874841690063477, "logps/rejected": -5.373307704925537, "loss": 0.7571, "nll_loss": 0.7218384146690369, "rewards/accuracies": 0.875, "rewards/chosen": -0.22874844074249268, "rewards/margins": 0.3085823357105255, "rewards/rejected": -0.5373308062553406, "step": 2567 }, { "epoch": 7.030800821355236, "grad_norm": 3.8291988372802734, "learning_rate": 6.482191780821917e-07, "log_odds_chosen": 3.2193241119384766, "log_odds_ratio": -0.21784107387065887, "logits/chosen": 0.8446036577224731, "logits/rejected": 0.8573979139328003, "logps/chosen": -2.2809157371520996, "logps/rejected": -5.32720947265625, "loss": 0.7337, "nll_loss": 0.7118738889694214, "rewards/accuracies": 0.875, "rewards/chosen": -0.22809159755706787, "rewards/margins": 0.304629385471344, "rewards/rejected": -0.5327209234237671, "step": 2568 }, { "epoch": 7.0335386721423685, "grad_norm": 2.8708529472351074, "learning_rate": 6.480821917808219e-07, "log_odds_chosen": 2.4751758575439453, "log_odds_ratio": -0.19207234680652618, "logits/chosen": 1.0290415287017822, "logits/rejected": 1.0573762655258179, "logps/chosen": -1.8492345809936523, "logps/rejected": -4.060070037841797, "loss": 0.6536, "nll_loss": 0.6343622207641602, "rewards/accuracies": 1.0, "rewards/chosen": -0.1849234700202942, "rewards/margins": 0.22108355164527893, "rewards/rejected": -0.4060070216655731, "step": 2569 }, { "epoch": 7.036276522929501, "grad_norm": 2.6525070667266846, "learning_rate": 6.479452054794519e-07, "log_odds_chosen": 2.3083701133728027, "log_odds_ratio": -0.26573997735977173, "logits/chosen": 0.572483241558075, "logits/rejected": 0.5237210988998413, "logps/chosen": -2.215287208557129, "logps/rejected": -4.418101787567139, "loss": 0.7418, "nll_loss": 0.7151933312416077, "rewards/accuracies": 0.875, "rewards/chosen": -0.22152872383594513, "rewards/margins": 0.2202814668416977, "rewards/rejected": -0.4418102204799652, "step": 2570 }, { "epoch": 7.039014373716633, "grad_norm": 2.5284159183502197, "learning_rate": 6.478082191780821e-07, "log_odds_chosen": 2.2116756439208984, "log_odds_ratio": -0.42349401116371155, "logits/chosen": 0.5207659602165222, "logits/rejected": 0.508804440498352, "logps/chosen": -2.1974356174468994, "logps/rejected": -4.320374011993408, "loss": 0.7064, "nll_loss": 0.664033055305481, "rewards/accuracies": 0.75, "rewards/chosen": -0.219743549823761, "rewards/margins": 0.2122938632965088, "rewards/rejected": -0.4320374131202698, "step": 2571 }, { "epoch": 7.041752224503765, "grad_norm": 3.918421983718872, "learning_rate": 6.476712328767123e-07, "log_odds_chosen": 1.5750946998596191, "log_odds_ratio": -0.27798110246658325, "logits/chosen": 0.8193017244338989, "logits/rejected": 0.7684826850891113, "logps/chosen": -1.916640043258667, "logps/rejected": -3.2363100051879883, "loss": 0.7208, "nll_loss": 0.69295734167099, "rewards/accuracies": 1.0, "rewards/chosen": -0.19166401028633118, "rewards/margins": 0.1319669634103775, "rewards/rejected": -0.3236309885978699, "step": 2572 }, { "epoch": 7.044490075290897, "grad_norm": 2.7966880798339844, "learning_rate": 6.475342465753424e-07, "log_odds_chosen": 1.950070858001709, "log_odds_ratio": -0.4696462154388428, "logits/chosen": 0.893158495426178, "logits/rejected": 0.8419412970542908, "logps/chosen": -2.242058277130127, "logps/rejected": -4.0382080078125, "loss": 0.8228, "nll_loss": 0.7757894992828369, "rewards/accuracies": 0.875, "rewards/chosen": -0.2242058366537094, "rewards/margins": 0.17961494624614716, "rewards/rejected": -0.40382078289985657, "step": 2573 }, { "epoch": 7.047227926078029, "grad_norm": 6.788820266723633, "learning_rate": 6.473972602739725e-07, "log_odds_chosen": 1.3885570764541626, "log_odds_ratio": -0.5430851578712463, "logits/chosen": 0.6119270324707031, "logits/rejected": 0.5888470411300659, "logps/chosen": -2.195172071456909, "logps/rejected": -3.4662013053894043, "loss": 0.7275, "nll_loss": 0.6731629371643066, "rewards/accuracies": 0.75, "rewards/chosen": -0.21951723098754883, "rewards/margins": 0.12710288166999817, "rewards/rejected": -0.346620112657547, "step": 2574 }, { "epoch": 7.049965776865161, "grad_norm": 3.092008113861084, "learning_rate": 6.472602739726027e-07, "log_odds_chosen": 2.4443976879119873, "log_odds_ratio": -0.315635085105896, "logits/chosen": 0.8805254697799683, "logits/rejected": 0.9388972520828247, "logps/chosen": -3.50582218170166, "logps/rejected": -5.8903656005859375, "loss": 0.7703, "nll_loss": 0.7386925220489502, "rewards/accuracies": 0.75, "rewards/chosen": -0.35058221220970154, "rewards/margins": 0.23845431208610535, "rewards/rejected": -0.5890365242958069, "step": 2575 }, { "epoch": 7.052703627652293, "grad_norm": 3.760509490966797, "learning_rate": 6.471232876712328e-07, "log_odds_chosen": 3.1090140342712402, "log_odds_ratio": -0.3416343927383423, "logits/chosen": 1.1458053588867188, "logits/rejected": 1.0996983051300049, "logps/chosen": -2.395261287689209, "logps/rejected": -5.396864891052246, "loss": 0.6751, "nll_loss": 0.6409493088722229, "rewards/accuracies": 0.875, "rewards/chosen": -0.2395261526107788, "rewards/margins": 0.30016040802001953, "rewards/rejected": -0.5396865606307983, "step": 2576 }, { "epoch": 7.055441478439425, "grad_norm": 2.6820621490478516, "learning_rate": 6.469863013698629e-07, "log_odds_chosen": 2.6737771034240723, "log_odds_ratio": -0.24467426538467407, "logits/chosen": 0.827919065952301, "logits/rejected": 0.7840472459793091, "logps/chosen": -1.3223509788513184, "logps/rejected": -3.773400068283081, "loss": 0.6422, "nll_loss": 0.6177164316177368, "rewards/accuracies": 1.0, "rewards/chosen": -0.1322351098060608, "rewards/margins": 0.24510490894317627, "rewards/rejected": -0.37734001874923706, "step": 2577 }, { "epoch": 7.058179329226557, "grad_norm": 2.7752010822296143, "learning_rate": 6.468493150684932e-07, "log_odds_chosen": 1.8726756572723389, "log_odds_ratio": -0.3396233320236206, "logits/chosen": 0.5842395424842834, "logits/rejected": 0.56706702709198, "logps/chosen": -2.2910122871398926, "logps/rejected": -3.998596668243408, "loss": 0.7522, "nll_loss": 0.7182025909423828, "rewards/accuracies": 0.75, "rewards/chosen": -0.22910122573375702, "rewards/margins": 0.1707584410905838, "rewards/rejected": -0.3998596668243408, "step": 2578 }, { "epoch": 7.060917180013689, "grad_norm": 3.4210543632507324, "learning_rate": 6.467123287671232e-07, "log_odds_chosen": 3.210843086242676, "log_odds_ratio": -0.15968169271945953, "logits/chosen": 0.8928946256637573, "logits/rejected": 0.9008287191390991, "logps/chosen": -1.7124240398406982, "logps/rejected": -4.749981880187988, "loss": 0.6209, "nll_loss": 0.6048908233642578, "rewards/accuracies": 1.0, "rewards/chosen": -0.17124240100383759, "rewards/margins": 0.3037557899951935, "rewards/rejected": -0.4749981760978699, "step": 2579 }, { "epoch": 7.063655030800821, "grad_norm": 2.9844844341278076, "learning_rate": 6.465753424657535e-07, "log_odds_chosen": 2.700941801071167, "log_odds_ratio": -0.22216837108135223, "logits/chosen": 0.5651682019233704, "logits/rejected": 0.5225560069084167, "logps/chosen": -2.1144485473632812, "logps/rejected": -4.640986442565918, "loss": 0.6291, "nll_loss": 0.6068494319915771, "rewards/accuracies": 0.875, "rewards/chosen": -0.21144485473632812, "rewards/margins": 0.2526538074016571, "rewards/rejected": -0.4640986919403076, "step": 2580 }, { "epoch": 7.066392881587953, "grad_norm": 3.1298534870147705, "learning_rate": 6.464383561643836e-07, "log_odds_chosen": 2.7373642921447754, "log_odds_ratio": -0.22778716683387756, "logits/chosen": 0.8333098292350769, "logits/rejected": 0.8188613653182983, "logps/chosen": -1.4904589653015137, "logps/rejected": -4.045243263244629, "loss": 0.6576, "nll_loss": 0.6348023414611816, "rewards/accuracies": 1.0, "rewards/chosen": -0.1490459144115448, "rewards/margins": 0.2554784417152405, "rewards/rejected": -0.4045243561267853, "step": 2581 }, { "epoch": 7.069130732375085, "grad_norm": 3.268822193145752, "learning_rate": 6.463013698630137e-07, "log_odds_chosen": 1.959486961364746, "log_odds_ratio": -0.24102070927619934, "logits/chosen": 1.002177357673645, "logits/rejected": 0.985418438911438, "logps/chosen": -2.0260276794433594, "logps/rejected": -3.7823634147644043, "loss": 0.65, "nll_loss": 0.6259331703186035, "rewards/accuracies": 1.0, "rewards/chosen": -0.20260277390480042, "rewards/margins": 0.17563357949256897, "rewards/rejected": -0.3782363533973694, "step": 2582 }, { "epoch": 7.0718685831622174, "grad_norm": 2.631765842437744, "learning_rate": 6.461643835616439e-07, "log_odds_chosen": 3.428635597229004, "log_odds_ratio": -0.17176735401153564, "logits/chosen": 0.7410398721694946, "logits/rejected": 0.6845018267631531, "logps/chosen": -1.9532179832458496, "logps/rejected": -5.229506015777588, "loss": 0.7006, "nll_loss": 0.6834145784378052, "rewards/accuracies": 1.0, "rewards/chosen": -0.19532179832458496, "rewards/margins": 0.32762882113456726, "rewards/rejected": -0.5229506492614746, "step": 2583 }, { "epoch": 7.0746064339493495, "grad_norm": 3.007500648498535, "learning_rate": 6.46027397260274e-07, "log_odds_chosen": 3.01682710647583, "log_odds_ratio": -0.3409540057182312, "logits/chosen": 0.8023368120193481, "logits/rejected": 0.825206458568573, "logps/chosen": -2.367233991622925, "logps/rejected": -5.302774429321289, "loss": 0.8072, "nll_loss": 0.7730560302734375, "rewards/accuracies": 0.75, "rewards/chosen": -0.2367233783006668, "rewards/margins": 0.29355403780937195, "rewards/rejected": -0.5302774310112, "step": 2584 }, { "epoch": 7.077344284736482, "grad_norm": 2.900291681289673, "learning_rate": 6.458904109589041e-07, "log_odds_chosen": 2.5671277046203613, "log_odds_ratio": -0.19586218893527985, "logits/chosen": 0.802543580532074, "logits/rejected": 0.7742672562599182, "logps/chosen": -1.70853853225708, "logps/rejected": -4.048953056335449, "loss": 0.7178, "nll_loss": 0.6982570886611938, "rewards/accuracies": 1.0, "rewards/chosen": -0.170853853225708, "rewards/margins": 0.23404142260551453, "rewards/rejected": -0.40489527583122253, "step": 2585 }, { "epoch": 7.080082135523614, "grad_norm": 3.258821964263916, "learning_rate": 6.457534246575343e-07, "log_odds_chosen": 3.8038368225097656, "log_odds_ratio": -0.18095342814922333, "logits/chosen": 0.7771018743515015, "logits/rejected": 0.7840940356254578, "logps/chosen": -1.7758663892745972, "logps/rejected": -5.366130352020264, "loss": 0.7669, "nll_loss": 0.7487627267837524, "rewards/accuracies": 0.875, "rewards/chosen": -0.177586629986763, "rewards/margins": 0.3590264320373535, "rewards/rejected": -0.5366130471229553, "step": 2586 }, { "epoch": 7.082819986310746, "grad_norm": 2.9415197372436523, "learning_rate": 6.456164383561643e-07, "log_odds_chosen": 0.7371580004692078, "log_odds_ratio": -0.4460577666759491, "logits/chosen": 0.6070093512535095, "logits/rejected": 0.5496604442596436, "logps/chosen": -1.7115076780319214, "logps/rejected": -2.328007698059082, "loss": 0.713, "nll_loss": 0.6683609485626221, "rewards/accuracies": 0.75, "rewards/chosen": -0.17115077376365662, "rewards/margins": 0.061649974435567856, "rewards/rejected": -0.23280075192451477, "step": 2587 }, { "epoch": 7.085557837097878, "grad_norm": 3.5501084327697754, "learning_rate": 6.454794520547945e-07, "log_odds_chosen": 1.6210782527923584, "log_odds_ratio": -0.572381317615509, "logits/chosen": 0.7293860912322998, "logits/rejected": 0.7709770798683167, "logps/chosen": -2.1723544597625732, "logps/rejected": -3.660818338394165, "loss": 0.6865, "nll_loss": 0.629280686378479, "rewards/accuracies": 0.875, "rewards/chosen": -0.21723543107509613, "rewards/margins": 0.14884638786315918, "rewards/rejected": -0.3660818040370941, "step": 2588 }, { "epoch": 7.08829568788501, "grad_norm": 2.6579294204711914, "learning_rate": 6.453424657534247e-07, "log_odds_chosen": 3.4546234607696533, "log_odds_ratio": -0.0850975513458252, "logits/chosen": 0.6994146704673767, "logits/rejected": 0.6582366824150085, "logps/chosen": -1.5216107368469238, "logps/rejected": -4.72415828704834, "loss": 0.6675, "nll_loss": 0.6589605212211609, "rewards/accuracies": 1.0, "rewards/chosen": -0.15216107666492462, "rewards/margins": 0.32025477290153503, "rewards/rejected": -0.47241583466529846, "step": 2589 }, { "epoch": 7.091033538672143, "grad_norm": 2.7600622177124023, "learning_rate": 6.452054794520548e-07, "log_odds_chosen": 2.3202664852142334, "log_odds_ratio": -0.2205459326505661, "logits/chosen": 0.9820610284805298, "logits/rejected": 0.9415736794471741, "logps/chosen": -1.5262205600738525, "logps/rejected": -3.6547703742980957, "loss": 0.6704, "nll_loss": 0.6483867168426514, "rewards/accuracies": 1.0, "rewards/chosen": -0.1526220589876175, "rewards/margins": 0.21285495162010193, "rewards/rejected": -0.3654770255088806, "step": 2590 }, { "epoch": 7.093771389459275, "grad_norm": 4.21327018737793, "learning_rate": 6.450684931506849e-07, "log_odds_chosen": 2.064663887023926, "log_odds_ratio": -0.5697973966598511, "logits/chosen": 0.7555615305900574, "logits/rejected": 0.7212461233139038, "logps/chosen": -2.2474870681762695, "logps/rejected": -4.19959831237793, "loss": 0.7498, "nll_loss": 0.6927957534790039, "rewards/accuracies": 0.625, "rewards/chosen": -0.22474871575832367, "rewards/margins": 0.19521109759807587, "rewards/rejected": -0.41995981335639954, "step": 2591 }, { "epoch": 7.096509240246407, "grad_norm": 3.465566635131836, "learning_rate": 6.449315068493151e-07, "log_odds_chosen": 3.379318952560425, "log_odds_ratio": -0.26214298605918884, "logits/chosen": 0.9820556640625, "logits/rejected": 1.0400667190551758, "logps/chosen": -3.392284631729126, "logps/rejected": -6.726290702819824, "loss": 0.8987, "nll_loss": 0.8725205659866333, "rewards/accuracies": 0.875, "rewards/chosen": -0.33922845125198364, "rewards/margins": 0.3334006369113922, "rewards/rejected": -0.6726291179656982, "step": 2592 }, { "epoch": 7.099247091033539, "grad_norm": 2.770624876022339, "learning_rate": 6.447945205479452e-07, "log_odds_chosen": 2.677964925765991, "log_odds_ratio": -0.28391462564468384, "logits/chosen": 0.8120695352554321, "logits/rejected": 0.808296263217926, "logps/chosen": -2.111128330230713, "logps/rejected": -4.6717095375061035, "loss": 0.7225, "nll_loss": 0.6940661668777466, "rewards/accuracies": 0.875, "rewards/chosen": -0.2111128270626068, "rewards/margins": 0.2560581564903259, "rewards/rejected": -0.46717098355293274, "step": 2593 }, { "epoch": 7.101984941820671, "grad_norm": 3.7706892490386963, "learning_rate": 6.446575342465753e-07, "log_odds_chosen": 1.4444427490234375, "log_odds_ratio": -0.6184214353561401, "logits/chosen": 0.5919376611709595, "logits/rejected": 0.558884859085083, "logps/chosen": -2.0291595458984375, "logps/rejected": -3.3646984100341797, "loss": 0.7938, "nll_loss": 0.7319633960723877, "rewards/accuracies": 0.75, "rewards/chosen": -0.2029159665107727, "rewards/margins": 0.1335539072751999, "rewards/rejected": -0.3364698588848114, "step": 2594 }, { "epoch": 7.104722792607803, "grad_norm": 3.1883535385131836, "learning_rate": 6.445205479452055e-07, "log_odds_chosen": 2.6871681213378906, "log_odds_ratio": -0.20104144513607025, "logits/chosen": 0.9413330554962158, "logits/rejected": 0.9302253127098083, "logps/chosen": -2.1388230323791504, "logps/rejected": -4.596354961395264, "loss": 0.7327, "nll_loss": 0.7125515937805176, "rewards/accuracies": 1.0, "rewards/chosen": -0.21388229727745056, "rewards/margins": 0.2457531839609146, "rewards/rejected": -0.45963549613952637, "step": 2595 }, { "epoch": 7.107460643394935, "grad_norm": 2.695338726043701, "learning_rate": 6.443835616438356e-07, "log_odds_chosen": 2.0238959789276123, "log_odds_ratio": -0.4123022258281708, "logits/chosen": 0.6181990504264832, "logits/rejected": 0.5998414754867554, "logps/chosen": -1.859278917312622, "logps/rejected": -3.61751127243042, "loss": 0.6899, "nll_loss": 0.648647665977478, "rewards/accuracies": 0.875, "rewards/chosen": -0.1859278827905655, "rewards/margins": 0.17582325637340546, "rewards/rejected": -0.36175113916397095, "step": 2596 }, { "epoch": 7.110198494182067, "grad_norm": 2.812223434448242, "learning_rate": 6.442465753424658e-07, "log_odds_chosen": 2.893033266067505, "log_odds_ratio": -0.07460007816553116, "logits/chosen": 0.9650598764419556, "logits/rejected": 0.9665504097938538, "logps/chosen": -1.9546207189559937, "logps/rejected": -4.6571946144104, "loss": 0.586, "nll_loss": 0.5785168409347534, "rewards/accuracies": 1.0, "rewards/chosen": -0.19546207785606384, "rewards/margins": 0.2702573835849762, "rewards/rejected": -0.4657194912433624, "step": 2597 }, { "epoch": 7.112936344969199, "grad_norm": 2.682809352874756, "learning_rate": 6.441095890410959e-07, "log_odds_chosen": 3.4511196613311768, "log_odds_ratio": -0.2213110476732254, "logits/chosen": 0.8321341276168823, "logits/rejected": 0.8431138396263123, "logps/chosen": -2.3009421825408936, "logps/rejected": -5.622912406921387, "loss": 0.8261, "nll_loss": 0.8039296865463257, "rewards/accuracies": 0.75, "rewards/chosen": -0.23009422421455383, "rewards/margins": 0.33219701051712036, "rewards/rejected": -0.5622912645339966, "step": 2598 }, { "epoch": 7.115674195756331, "grad_norm": 4.578598976135254, "learning_rate": 6.43972602739726e-07, "log_odds_chosen": 2.7265233993530273, "log_odds_ratio": -0.4385026693344116, "logits/chosen": 1.1743061542510986, "logits/rejected": 1.2316187620162964, "logps/chosen": -2.4111862182617188, "logps/rejected": -4.989015102386475, "loss": 0.7373, "nll_loss": 0.6934055089950562, "rewards/accuracies": 0.75, "rewards/chosen": -0.2411186546087265, "rewards/margins": 0.25778287649154663, "rewards/rejected": -0.49890148639678955, "step": 2599 }, { "epoch": 7.1184120465434635, "grad_norm": 3.0118091106414795, "learning_rate": 6.438356164383562e-07, "log_odds_chosen": 2.5478434562683105, "log_odds_ratio": -0.18140649795532227, "logits/chosen": 0.8490963578224182, "logits/rejected": 0.8309942483901978, "logps/chosen": -1.273110270500183, "logps/rejected": -3.5632400512695312, "loss": 0.6686, "nll_loss": 0.6504162549972534, "rewards/accuracies": 1.0, "rewards/chosen": -0.12731102108955383, "rewards/margins": 0.22901298105716705, "rewards/rejected": -0.3563240170478821, "step": 2600 }, { "epoch": 7.121149897330596, "grad_norm": 4.00246524810791, "learning_rate": 6.436986301369862e-07, "log_odds_chosen": 4.215998649597168, "log_odds_ratio": -0.5586525201797485, "logits/chosen": 0.8337609171867371, "logits/rejected": 0.8324238657951355, "logps/chosen": -2.013852119445801, "logps/rejected": -6.06315279006958, "loss": 0.6943, "nll_loss": 0.6383886337280273, "rewards/accuracies": 0.875, "rewards/chosen": -0.2013852298259735, "rewards/margins": 0.404930055141449, "rewards/rejected": -0.6063152551651001, "step": 2601 }, { "epoch": 7.123887748117728, "grad_norm": 3.6952126026153564, "learning_rate": 6.435616438356164e-07, "log_odds_chosen": 1.7914752960205078, "log_odds_ratio": -0.25419166684150696, "logits/chosen": 0.9945595860481262, "logits/rejected": 0.9593359231948853, "logps/chosen": -2.0611116886138916, "logps/rejected": -3.637479543685913, "loss": 0.6594, "nll_loss": 0.6340261697769165, "rewards/accuracies": 1.0, "rewards/chosen": -0.20611117780208588, "rewards/margins": 0.15763679146766663, "rewards/rejected": -0.3637479543685913, "step": 2602 }, { "epoch": 7.12662559890486, "grad_norm": 2.825727939605713, "learning_rate": 6.434246575342466e-07, "log_odds_chosen": 1.60237717628479, "log_odds_ratio": -0.2932794690132141, "logits/chosen": 0.7685093879699707, "logits/rejected": 0.7664271593093872, "logps/chosen": -2.062701463699341, "logps/rejected": -3.510458469390869, "loss": 0.649, "nll_loss": 0.6196715831756592, "rewards/accuracies": 1.0, "rewards/chosen": -0.20627017319202423, "rewards/margins": 0.14477568864822388, "rewards/rejected": -0.3510458767414093, "step": 2603 }, { "epoch": 7.129363449691992, "grad_norm": 2.981449604034424, "learning_rate": 6.432876712328767e-07, "log_odds_chosen": 2.7851860523223877, "log_odds_ratio": -0.13615016639232635, "logits/chosen": 0.6778751015663147, "logits/rejected": 0.6184920072555542, "logps/chosen": -1.4544427394866943, "logps/rejected": -4.003599643707275, "loss": 0.6911, "nll_loss": 0.6774735450744629, "rewards/accuracies": 1.0, "rewards/chosen": -0.14544427394866943, "rewards/margins": 0.254915714263916, "rewards/rejected": -0.4003599286079407, "step": 2604 }, { "epoch": 7.132101300479124, "grad_norm": 2.7187089920043945, "learning_rate": 6.431506849315068e-07, "log_odds_chosen": 2.2152581214904785, "log_odds_ratio": -0.23922553658485413, "logits/chosen": 1.0882575511932373, "logits/rejected": 1.0573729276657104, "logps/chosen": -1.8522794246673584, "logps/rejected": -3.919929027557373, "loss": 0.6419, "nll_loss": 0.6180154085159302, "rewards/accuracies": 1.0, "rewards/chosen": -0.18522794544696808, "rewards/margins": 0.20676493644714355, "rewards/rejected": -0.39199286699295044, "step": 2605 }, { "epoch": 7.134839151266256, "grad_norm": 4.493331432342529, "learning_rate": 6.43013698630137e-07, "log_odds_chosen": 0.844180166721344, "log_odds_ratio": -0.4518586993217468, "logits/chosen": 0.779788076877594, "logits/rejected": 0.7834202647209167, "logps/chosen": -2.590923309326172, "logps/rejected": -3.3598103523254395, "loss": 0.7279, "nll_loss": 0.6827616691589355, "rewards/accuracies": 0.75, "rewards/chosen": -0.2590923607349396, "rewards/margins": 0.07688871026039124, "rewards/rejected": -0.3359810709953308, "step": 2606 }, { "epoch": 7.137577002053388, "grad_norm": 2.8805124759674072, "learning_rate": 6.428767123287671e-07, "log_odds_chosen": 1.4456918239593506, "log_odds_ratio": -0.4176623225212097, "logits/chosen": 0.7845702767372131, "logits/rejected": 0.7694344520568848, "logps/chosen": -1.9485435485839844, "logps/rejected": -3.345020294189453, "loss": 0.6569, "nll_loss": 0.6151301860809326, "rewards/accuracies": 0.75, "rewards/chosen": -0.19485436379909515, "rewards/margins": 0.1396476924419403, "rewards/rejected": -0.33450204133987427, "step": 2607 }, { "epoch": 7.14031485284052, "grad_norm": 2.555058002471924, "learning_rate": 6.427397260273972e-07, "log_odds_chosen": 2.6321322917938232, "log_odds_ratio": -0.17262214422225952, "logits/chosen": 0.7349501252174377, "logits/rejected": 0.6571807861328125, "logps/chosen": -1.5994362831115723, "logps/rejected": -4.041302680969238, "loss": 0.6489, "nll_loss": 0.6316730976104736, "rewards/accuracies": 1.0, "rewards/chosen": -0.159943625330925, "rewards/margins": 0.2441866397857666, "rewards/rejected": -0.4041302502155304, "step": 2608 }, { "epoch": 7.143052703627652, "grad_norm": 3.0489537715911865, "learning_rate": 6.426027397260274e-07, "log_odds_chosen": 2.742623805999756, "log_odds_ratio": -0.25019484758377075, "logits/chosen": 0.8505795001983643, "logits/rejected": 0.8341212272644043, "logps/chosen": -2.8902597427368164, "logps/rejected": -5.54059362411499, "loss": 0.8182, "nll_loss": 0.7931706309318542, "rewards/accuracies": 0.875, "rewards/chosen": -0.2890259623527527, "rewards/margins": 0.26503339409828186, "rewards/rejected": -0.5540593862533569, "step": 2609 }, { "epoch": 7.145790554414784, "grad_norm": 4.3301920890808105, "learning_rate": 6.424657534246575e-07, "log_odds_chosen": 1.4554985761642456, "log_odds_ratio": -0.36919844150543213, "logits/chosen": 1.1029956340789795, "logits/rejected": 1.0937907695770264, "logps/chosen": -2.9964733123779297, "logps/rejected": -4.370691776275635, "loss": 0.7653, "nll_loss": 0.7283612489700317, "rewards/accuracies": 1.0, "rewards/chosen": -0.2996473014354706, "rewards/margins": 0.1374218761920929, "rewards/rejected": -0.4370691776275635, "step": 2610 }, { "epoch": 7.148528405201916, "grad_norm": 2.9539945125579834, "learning_rate": 6.423287671232877e-07, "log_odds_chosen": 1.4814246892929077, "log_odds_ratio": -0.33185064792633057, "logits/chosen": 0.7818297147750854, "logits/rejected": 0.7960766553878784, "logps/chosen": -1.6133248805999756, "logps/rejected": -2.968869924545288, "loss": 0.7269, "nll_loss": 0.6936929225921631, "rewards/accuracies": 1.0, "rewards/chosen": -0.16133248805999756, "rewards/margins": 0.1355545073747635, "rewards/rejected": -0.29688698053359985, "step": 2611 }, { "epoch": 7.151266255989048, "grad_norm": 3.7534420490264893, "learning_rate": 6.421917808219178e-07, "log_odds_chosen": 2.263702869415283, "log_odds_ratio": -0.3366076946258545, "logits/chosen": 0.7096863985061646, "logits/rejected": 0.689109206199646, "logps/chosen": -2.3127188682556152, "logps/rejected": -4.450030326843262, "loss": 0.8251, "nll_loss": 0.7914649844169617, "rewards/accuracies": 0.875, "rewards/chosen": -0.231271892786026, "rewards/margins": 0.21373115479946136, "rewards/rejected": -0.44500303268432617, "step": 2612 }, { "epoch": 7.15400410677618, "grad_norm": 2.773693561553955, "learning_rate": 6.420547945205479e-07, "log_odds_chosen": 4.228410720825195, "log_odds_ratio": -0.11773310601711273, "logits/chosen": 1.0953079462051392, "logits/rejected": 1.1379520893096924, "logps/chosen": -1.890347957611084, "logps/rejected": -5.9155426025390625, "loss": 0.5827, "nll_loss": 0.5709410905838013, "rewards/accuracies": 1.0, "rewards/chosen": -0.18903478980064392, "rewards/margins": 0.4025194048881531, "rewards/rejected": -0.5915542244911194, "step": 2613 }, { "epoch": 7.156741957563312, "grad_norm": 3.144141912460327, "learning_rate": 6.419178082191781e-07, "log_odds_chosen": 1.5429946184158325, "log_odds_ratio": -0.2549342215061188, "logits/chosen": 0.7437893152236938, "logits/rejected": 0.6985880136489868, "logps/chosen": -1.2376376390457153, "logps/rejected": -2.5378291606903076, "loss": 0.6264, "nll_loss": 0.6009458303451538, "rewards/accuracies": 1.0, "rewards/chosen": -0.12376376986503601, "rewards/margins": 0.1300191581249237, "rewards/rejected": -0.2537829279899597, "step": 2614 }, { "epoch": 7.1594798083504445, "grad_norm": 2.994230031967163, "learning_rate": 6.417808219178081e-07, "log_odds_chosen": 2.2963674068450928, "log_odds_ratio": -0.2733224332332611, "logits/chosen": 0.9719548225402832, "logits/rejected": 0.9532957673072815, "logps/chosen": -2.3018014430999756, "logps/rejected": -4.459430694580078, "loss": 0.7163, "nll_loss": 0.6890078783035278, "rewards/accuracies": 0.875, "rewards/chosen": -0.23018014430999756, "rewards/margins": 0.2157629132270813, "rewards/rejected": -0.44594305753707886, "step": 2615 }, { "epoch": 7.162217659137577, "grad_norm": 2.7098238468170166, "learning_rate": 6.416438356164383e-07, "log_odds_chosen": 2.316688060760498, "log_odds_ratio": -0.21666428446769714, "logits/chosen": 0.7434964179992676, "logits/rejected": 0.7566842436790466, "logps/chosen": -1.69386625289917, "logps/rejected": -3.8610310554504395, "loss": 0.7298, "nll_loss": 0.7081706523895264, "rewards/accuracies": 1.0, "rewards/chosen": -0.169386625289917, "rewards/margins": 0.21671649813652039, "rewards/rejected": -0.3861031234264374, "step": 2616 }, { "epoch": 7.1649555099247095, "grad_norm": 3.4835238456726074, "learning_rate": 6.415068493150685e-07, "log_odds_chosen": 2.534177541732788, "log_odds_ratio": -0.22164928913116455, "logits/chosen": 0.8868597149848938, "logits/rejected": 0.8764513731002808, "logps/chosen": -2.272094249725342, "logps/rejected": -4.675999641418457, "loss": 0.6138, "nll_loss": 0.5916463136672974, "rewards/accuracies": 0.875, "rewards/chosen": -0.2272094488143921, "rewards/margins": 0.24039050936698914, "rewards/rejected": -0.4675999581813812, "step": 2617 }, { "epoch": 7.167693360711842, "grad_norm": 5.797330856323242, "learning_rate": 6.413698630136986e-07, "log_odds_chosen": 1.3979610204696655, "log_odds_ratio": -0.9721183776855469, "logits/chosen": 0.6058385372161865, "logits/rejected": 0.5883634090423584, "logps/chosen": -2.5310442447662354, "logps/rejected": -3.871757984161377, "loss": 0.755, "nll_loss": 0.6577916145324707, "rewards/accuracies": 0.75, "rewards/chosen": -0.25310444831848145, "rewards/margins": 0.13407135009765625, "rewards/rejected": -0.3871757984161377, "step": 2618 }, { "epoch": 7.170431211498974, "grad_norm": 2.883193016052246, "learning_rate": 6.412328767123287e-07, "log_odds_chosen": 1.233049750328064, "log_odds_ratio": -0.3109009861946106, "logits/chosen": 0.6738506555557251, "logits/rejected": 0.6286941766738892, "logps/chosen": -2.215224504470825, "logps/rejected": -3.332512855529785, "loss": 0.67, "nll_loss": 0.6389502882957458, "rewards/accuracies": 1.0, "rewards/chosen": -0.22152245044708252, "rewards/margins": 0.11172884702682495, "rewards/rejected": -0.33325129747390747, "step": 2619 }, { "epoch": 7.173169062286106, "grad_norm": 6.5941596031188965, "learning_rate": 6.410958904109589e-07, "log_odds_chosen": 0.560505747795105, "log_odds_ratio": -0.7202390432357788, "logits/chosen": 0.5644621253013611, "logits/rejected": 0.48848527669906616, "logps/chosen": -2.2120521068573, "logps/rejected": -2.67753267288208, "loss": 0.832, "nll_loss": 0.7600253820419312, "rewards/accuracies": 0.75, "rewards/chosen": -0.2212052196264267, "rewards/margins": 0.04654805734753609, "rewards/rejected": -0.2677532732486725, "step": 2620 }, { "epoch": 7.175906913073238, "grad_norm": 3.2720296382904053, "learning_rate": 6.40958904109589e-07, "log_odds_chosen": 1.4033691883087158, "log_odds_ratio": -0.3194938898086548, "logits/chosen": 0.5850871801376343, "logits/rejected": 0.4596002399921417, "logps/chosen": -1.4120292663574219, "logps/rejected": -2.6439855098724365, "loss": 0.7275, "nll_loss": 0.6955733299255371, "rewards/accuracies": 0.875, "rewards/chosen": -0.1412029266357422, "rewards/margins": 0.12319562584161758, "rewards/rejected": -0.2643985450267792, "step": 2621 }, { "epoch": 7.17864476386037, "grad_norm": 3.256011486053467, "learning_rate": 6.408219178082191e-07, "log_odds_chosen": 1.5256422758102417, "log_odds_ratio": -0.2920987010002136, "logits/chosen": 0.7935186624526978, "logits/rejected": 0.7451812624931335, "logps/chosen": -1.4356961250305176, "logps/rejected": -2.764838695526123, "loss": 0.6222, "nll_loss": 0.5930361747741699, "rewards/accuracies": 1.0, "rewards/chosen": -0.14356961846351624, "rewards/margins": 0.13291427493095398, "rewards/rejected": -0.2764838933944702, "step": 2622 }, { "epoch": 7.181382614647502, "grad_norm": 3.2631642818450928, "learning_rate": 6.406849315068493e-07, "log_odds_chosen": 3.8140053749084473, "log_odds_ratio": -0.2150164395570755, "logits/chosen": 0.8354145288467407, "logits/rejected": 0.8100240230560303, "logps/chosen": -1.9005283117294312, "logps/rejected": -5.543715953826904, "loss": 0.8157, "nll_loss": 0.7941892147064209, "rewards/accuracies": 1.0, "rewards/chosen": -0.1900528371334076, "rewards/margins": 0.36431875824928284, "rewards/rejected": -0.5543715953826904, "step": 2623 }, { "epoch": 7.184120465434634, "grad_norm": 3.2554266452789307, "learning_rate": 6.405479452054794e-07, "log_odds_chosen": 1.8428447246551514, "log_odds_ratio": -0.27341780066490173, "logits/chosen": 1.0446527004241943, "logits/rejected": 1.107686161994934, "logps/chosen": -1.8770381212234497, "logps/rejected": -3.549480438232422, "loss": 0.6094, "nll_loss": 0.5820862054824829, "rewards/accuracies": 1.0, "rewards/chosen": -0.18770381808280945, "rewards/margins": 0.16724422574043274, "rewards/rejected": -0.3549480438232422, "step": 2624 }, { "epoch": 7.186858316221766, "grad_norm": 2.835498332977295, "learning_rate": 6.404109589041096e-07, "log_odds_chosen": 2.767726182937622, "log_odds_ratio": -0.25034016370773315, "logits/chosen": 0.8539746999740601, "logits/rejected": 0.7995690107345581, "logps/chosen": -1.8370907306671143, "logps/rejected": -4.451162338256836, "loss": 0.6681, "nll_loss": 0.6430396437644958, "rewards/accuracies": 0.875, "rewards/chosen": -0.1837090700864792, "rewards/margins": 0.26140716671943665, "rewards/rejected": -0.44511622190475464, "step": 2625 }, { "epoch": 7.189596167008898, "grad_norm": 2.963895320892334, "learning_rate": 6.402739726027397e-07, "log_odds_chosen": 3.413461685180664, "log_odds_ratio": -0.12663979828357697, "logits/chosen": 0.808208703994751, "logits/rejected": 0.8147597312927246, "logps/chosen": -2.335446834564209, "logps/rejected": -5.525978088378906, "loss": 0.7454, "nll_loss": 0.732694149017334, "rewards/accuracies": 1.0, "rewards/chosen": -0.23354467749595642, "rewards/margins": 0.31905314326286316, "rewards/rejected": -0.5525978207588196, "step": 2626 }, { "epoch": 7.19233401779603, "grad_norm": 3.1157989501953125, "learning_rate": 6.401369863013698e-07, "log_odds_chosen": 1.8637206554412842, "log_odds_ratio": -0.24247294664382935, "logits/chosen": 0.8445740938186646, "logits/rejected": 0.837651252746582, "logps/chosen": -1.918182611465454, "logps/rejected": -3.621033191680908, "loss": 0.7245, "nll_loss": 0.7002595663070679, "rewards/accuracies": 0.875, "rewards/chosen": -0.1918182671070099, "rewards/margins": 0.17028504610061646, "rewards/rejected": -0.36210331320762634, "step": 2627 }, { "epoch": 7.195071868583162, "grad_norm": 3.1915605068206787, "learning_rate": 6.4e-07, "log_odds_chosen": 1.1104921102523804, "log_odds_ratio": -0.35189688205718994, "logits/chosen": 0.8684091567993164, "logits/rejected": 0.8748248219490051, "logps/chosen": -2.018620252609253, "logps/rejected": -3.0247159004211426, "loss": 0.6732, "nll_loss": 0.6379769444465637, "rewards/accuracies": 1.0, "rewards/chosen": -0.20186203718185425, "rewards/margins": 0.10060955584049225, "rewards/rejected": -0.3024715781211853, "step": 2628 }, { "epoch": 7.197809719370294, "grad_norm": 3.411555051803589, "learning_rate": 6.398630136986301e-07, "log_odds_chosen": 3.139659881591797, "log_odds_ratio": -0.2028358280658722, "logits/chosen": 0.9693107604980469, "logits/rejected": 0.984028697013855, "logps/chosen": -2.079197406768799, "logps/rejected": -5.092229843139648, "loss": 0.7211, "nll_loss": 0.7008017301559448, "rewards/accuracies": 1.0, "rewards/chosen": -0.20791974663734436, "rewards/margins": 0.3013032078742981, "rewards/rejected": -0.5092229843139648, "step": 2629 }, { "epoch": 7.200547570157426, "grad_norm": 2.874650478363037, "learning_rate": 6.397260273972602e-07, "log_odds_chosen": 1.6600422859191895, "log_odds_ratio": -0.26943671703338623, "logits/chosen": 0.5776413679122925, "logits/rejected": 0.5528252124786377, "logps/chosen": -1.593819499015808, "logps/rejected": -3.0916872024536133, "loss": 0.6607, "nll_loss": 0.6337758302688599, "rewards/accuracies": 1.0, "rewards/chosen": -0.15938197076320648, "rewards/margins": 0.14978677034378052, "rewards/rejected": -0.3091687560081482, "step": 2630 }, { "epoch": 7.2032854209445585, "grad_norm": 2.776008129119873, "learning_rate": 6.395890410958904e-07, "log_odds_chosen": 2.240807056427002, "log_odds_ratio": -0.22762525081634521, "logits/chosen": 0.7267009615898132, "logits/rejected": 0.7000349164009094, "logps/chosen": -1.806056022644043, "logps/rejected": -3.866434097290039, "loss": 0.7394, "nll_loss": 0.7166831493377686, "rewards/accuracies": 1.0, "rewards/chosen": -0.18060562014579773, "rewards/margins": 0.20603778958320618, "rewards/rejected": -0.3866433799266815, "step": 2631 }, { "epoch": 7.2060232717316905, "grad_norm": 4.648251533508301, "learning_rate": 6.394520547945205e-07, "log_odds_chosen": 0.8148216605186462, "log_odds_ratio": -0.5751414895057678, "logits/chosen": 0.9051641225814819, "logits/rejected": 0.8154088258743286, "logps/chosen": -2.6110353469848633, "logps/rejected": -3.364985942840576, "loss": 0.7917, "nll_loss": 0.7342036962509155, "rewards/accuracies": 0.75, "rewards/chosen": -0.2611035406589508, "rewards/margins": 0.07539506256580353, "rewards/rejected": -0.33649858832359314, "step": 2632 }, { "epoch": 7.208761122518823, "grad_norm": 2.867136001586914, "learning_rate": 6.393150684931506e-07, "log_odds_chosen": 1.844198226928711, "log_odds_ratio": -0.3043220043182373, "logits/chosen": 0.6703490018844604, "logits/rejected": 0.6244650483131409, "logps/chosen": -1.8439571857452393, "logps/rejected": -3.5774528980255127, "loss": 0.7882, "nll_loss": 0.7577354907989502, "rewards/accuracies": 0.875, "rewards/chosen": -0.1843957155942917, "rewards/margins": 0.17334961891174316, "rewards/rejected": -0.35774528980255127, "step": 2633 }, { "epoch": 7.211498973305955, "grad_norm": 3.121553659439087, "learning_rate": 6.391780821917808e-07, "log_odds_chosen": 1.4602919816970825, "log_odds_ratio": -0.3652629852294922, "logits/chosen": 0.890791654586792, "logits/rejected": 0.8944149017333984, "logps/chosen": -1.8486230373382568, "logps/rejected": -3.16304349899292, "loss": 0.621, "nll_loss": 0.584506094455719, "rewards/accuracies": 0.875, "rewards/chosen": -0.18486231565475464, "rewards/margins": 0.13144204020500183, "rewards/rejected": -0.31630435585975647, "step": 2634 }, { "epoch": 7.214236824093087, "grad_norm": 2.817512035369873, "learning_rate": 6.390410958904109e-07, "log_odds_chosen": 3.2466869354248047, "log_odds_ratio": -0.13372240960597992, "logits/chosen": 0.9606503248214722, "logits/rejected": 0.9977820515632629, "logps/chosen": -1.7482346296310425, "logps/rejected": -4.822875022888184, "loss": 0.5588, "nll_loss": 0.5453833937644958, "rewards/accuracies": 1.0, "rewards/chosen": -0.17482344806194305, "rewards/margins": 0.307464063167572, "rewards/rejected": -0.48228752613067627, "step": 2635 }, { "epoch": 7.216974674880219, "grad_norm": 3.2265467643737793, "learning_rate": 6.38904109589041e-07, "log_odds_chosen": 1.9963711500167847, "log_odds_ratio": -0.31984102725982666, "logits/chosen": 0.7805882692337036, "logits/rejected": 0.5894249677658081, "logps/chosen": -1.500317096710205, "logps/rejected": -3.361187219619751, "loss": 0.745, "nll_loss": 0.713019073009491, "rewards/accuracies": 1.0, "rewards/chosen": -0.15003171563148499, "rewards/margins": 0.1860870122909546, "rewards/rejected": -0.33611875772476196, "step": 2636 }, { "epoch": 7.219712525667351, "grad_norm": 3.8269495964050293, "learning_rate": 6.387671232876712e-07, "log_odds_chosen": 1.5137581825256348, "log_odds_ratio": -0.43962666392326355, "logits/chosen": 0.6650204658508301, "logits/rejected": 0.6511836051940918, "logps/chosen": -1.7988985776901245, "logps/rejected": -3.1497344970703125, "loss": 0.7235, "nll_loss": 0.6795066595077515, "rewards/accuracies": 0.75, "rewards/chosen": -0.17988987267017365, "rewards/margins": 0.13508358597755432, "rewards/rejected": -0.3149734437465668, "step": 2637 }, { "epoch": 7.222450376454483, "grad_norm": 3.3681862354278564, "learning_rate": 6.386301369863013e-07, "log_odds_chosen": 1.8934674263000488, "log_odds_ratio": -0.35563039779663086, "logits/chosen": 0.7458074688911438, "logits/rejected": 0.6807077527046204, "logps/chosen": -1.7192474603652954, "logps/rejected": -3.4900238513946533, "loss": 0.7217, "nll_loss": 0.686170220375061, "rewards/accuracies": 1.0, "rewards/chosen": -0.17192474007606506, "rewards/margins": 0.17707766592502594, "rewards/rejected": -0.3490024209022522, "step": 2638 }, { "epoch": 7.225188227241615, "grad_norm": 3.2685530185699463, "learning_rate": 6.384931506849315e-07, "log_odds_chosen": 1.2637829780578613, "log_odds_ratio": -0.32783210277557373, "logits/chosen": 0.7839583158493042, "logits/rejected": 0.8084722757339478, "logps/chosen": -2.4213428497314453, "logps/rejected": -3.568708658218384, "loss": 0.6542, "nll_loss": 0.621380627155304, "rewards/accuracies": 0.875, "rewards/chosen": -0.24213427305221558, "rewards/margins": 0.11473655700683594, "rewards/rejected": -0.3568708300590515, "step": 2639 }, { "epoch": 7.227926078028747, "grad_norm": 4.060766696929932, "learning_rate": 6.383561643835616e-07, "log_odds_chosen": 1.290898323059082, "log_odds_ratio": -0.33704230189323425, "logits/chosen": 0.7926896810531616, "logits/rejected": 0.7733604907989502, "logps/chosen": -2.110152244567871, "logps/rejected": -3.2669217586517334, "loss": 0.7662, "nll_loss": 0.7325173020362854, "rewards/accuracies": 1.0, "rewards/chosen": -0.2110152244567871, "rewards/margins": 0.11567696928977966, "rewards/rejected": -0.3266921937465668, "step": 2640 }, { "epoch": 7.230663928815879, "grad_norm": 7.39113187789917, "learning_rate": 6.382191780821917e-07, "log_odds_chosen": 1.3041388988494873, "log_odds_ratio": -0.4334013760089874, "logits/chosen": 0.9041983485221863, "logits/rejected": 0.9355789422988892, "logps/chosen": -2.3375601768493652, "logps/rejected": -3.4991455078125, "loss": 0.6599, "nll_loss": 0.616599440574646, "rewards/accuracies": 0.875, "rewards/chosen": -0.23375600576400757, "rewards/margins": 0.11615853756666183, "rewards/rejected": -0.34991455078125, "step": 2641 }, { "epoch": 7.233401779603011, "grad_norm": 2.948723793029785, "learning_rate": 6.380821917808219e-07, "log_odds_chosen": 3.256706714630127, "log_odds_ratio": -0.21969036757946014, "logits/chosen": 0.9754055738449097, "logits/rejected": 0.9752840995788574, "logps/chosen": -1.964957356452942, "logps/rejected": -5.078287601470947, "loss": 0.7427, "nll_loss": 0.7207326889038086, "rewards/accuracies": 0.875, "rewards/chosen": -0.19649574160575867, "rewards/margins": 0.3113330006599426, "rewards/rejected": -0.5078287124633789, "step": 2642 }, { "epoch": 7.236139630390143, "grad_norm": 3.591742992401123, "learning_rate": 6.37945205479452e-07, "log_odds_chosen": 2.3021373748779297, "log_odds_ratio": -0.29960018396377563, "logits/chosen": 0.7067235708236694, "logits/rejected": 0.6764968633651733, "logps/chosen": -2.005199909210205, "logps/rejected": -4.16650915145874, "loss": 0.7471, "nll_loss": 0.7171808481216431, "rewards/accuracies": 0.75, "rewards/chosen": -0.20051997900009155, "rewards/margins": 0.21613094210624695, "rewards/rejected": -0.4166509211063385, "step": 2643 }, { "epoch": 7.238877481177276, "grad_norm": 2.5676655769348145, "learning_rate": 6.378082191780821e-07, "log_odds_chosen": 3.6450936794281006, "log_odds_ratio": -0.15885992348194122, "logits/chosen": 0.869581401348114, "logits/rejected": 0.8273764848709106, "logps/chosen": -1.5016148090362549, "logps/rejected": -4.925845623016357, "loss": 0.6195, "nll_loss": 0.6035888195037842, "rewards/accuracies": 1.0, "rewards/chosen": -0.1501614898443222, "rewards/margins": 0.34242305159568787, "rewards/rejected": -0.49258458614349365, "step": 2644 }, { "epoch": 7.241615331964408, "grad_norm": 2.5373709201812744, "learning_rate": 6.376712328767123e-07, "log_odds_chosen": 3.4354875087738037, "log_odds_ratio": -0.284254789352417, "logits/chosen": 0.9157578945159912, "logits/rejected": 0.8590763807296753, "logps/chosen": -1.7070664167404175, "logps/rejected": -5.004787445068359, "loss": 0.6482, "nll_loss": 0.6197460889816284, "rewards/accuracies": 1.0, "rewards/chosen": -0.17070665955543518, "rewards/margins": 0.3297721743583679, "rewards/rejected": -0.5004788041114807, "step": 2645 }, { "epoch": 7.24435318275154, "grad_norm": 3.415017604827881, "learning_rate": 6.375342465753424e-07, "log_odds_chosen": 2.108403205871582, "log_odds_ratio": -0.3244434595108032, "logits/chosen": 0.7994186282157898, "logits/rejected": 0.7866486310958862, "logps/chosen": -2.31184458732605, "logps/rejected": -4.315509796142578, "loss": 0.7723, "nll_loss": 0.7398866415023804, "rewards/accuracies": 0.875, "rewards/chosen": -0.2311844676733017, "rewards/margins": 0.2003665566444397, "rewards/rejected": -0.4315509796142578, "step": 2646 }, { "epoch": 7.2470910335386725, "grad_norm": 3.2883851528167725, "learning_rate": 6.373972602739725e-07, "log_odds_chosen": 3.017313003540039, "log_odds_ratio": -0.18523405492305756, "logits/chosen": 0.8843082189559937, "logits/rejected": 0.8722001314163208, "logps/chosen": -2.585181951522827, "logps/rejected": -5.436867713928223, "loss": 0.6795, "nll_loss": 0.6609604358673096, "rewards/accuracies": 1.0, "rewards/chosen": -0.2585182189941406, "rewards/margins": 0.2851685583591461, "rewards/rejected": -0.5436868071556091, "step": 2647 }, { "epoch": 7.2498288843258045, "grad_norm": 3.1370279788970947, "learning_rate": 6.372602739726027e-07, "log_odds_chosen": 2.1900525093078613, "log_odds_ratio": -0.2571347951889038, "logits/chosen": 0.7462904453277588, "logits/rejected": 0.6634770631790161, "logps/chosen": -1.6839020252227783, "logps/rejected": -3.6958892345428467, "loss": 0.6496, "nll_loss": 0.623863160610199, "rewards/accuracies": 0.875, "rewards/chosen": -0.1683901995420456, "rewards/margins": 0.20119871199131012, "rewards/rejected": -0.3695888817310333, "step": 2648 }, { "epoch": 7.252566735112937, "grad_norm": 5.241602420806885, "learning_rate": 6.371232876712328e-07, "log_odds_chosen": 2.6315560340881348, "log_odds_ratio": -0.21290235221385956, "logits/chosen": 0.6934149265289307, "logits/rejected": 0.6765881180763245, "logps/chosen": -2.225884437561035, "logps/rejected": -4.578583717346191, "loss": 0.7106, "nll_loss": 0.6892758011817932, "rewards/accuracies": 0.875, "rewards/chosen": -0.222588449716568, "rewards/margins": 0.2352699637413025, "rewards/rejected": -0.4578584134578705, "step": 2649 }, { "epoch": 7.255304585900069, "grad_norm": 3.082601547241211, "learning_rate": 6.369863013698629e-07, "log_odds_chosen": 3.379769802093506, "log_odds_ratio": -0.20388709008693695, "logits/chosen": 0.5718228816986084, "logits/rejected": 0.5456352233886719, "logps/chosen": -1.986159086227417, "logps/rejected": -5.136564254760742, "loss": 0.778, "nll_loss": 0.7575803995132446, "rewards/accuracies": 1.0, "rewards/chosen": -0.1986159086227417, "rewards/margins": 0.31504055857658386, "rewards/rejected": -0.5136564373970032, "step": 2650 }, { "epoch": 7.258042436687201, "grad_norm": 3.170603036880493, "learning_rate": 6.368493150684931e-07, "log_odds_chosen": 4.258889198303223, "log_odds_ratio": -0.07104705274105072, "logits/chosen": 1.0187060832977295, "logits/rejected": 1.0488674640655518, "logps/chosen": -2.052192449569702, "logps/rejected": -6.105460166931152, "loss": 0.6485, "nll_loss": 0.6414361000061035, "rewards/accuracies": 1.0, "rewards/chosen": -0.20521925389766693, "rewards/margins": 0.4053267240524292, "rewards/rejected": -0.6105459332466125, "step": 2651 }, { "epoch": 7.260780287474333, "grad_norm": 2.8013832569122314, "learning_rate": 6.367123287671232e-07, "log_odds_chosen": 3.211946725845337, "log_odds_ratio": -0.2543405294418335, "logits/chosen": 0.7503865957260132, "logits/rejected": 0.7544170618057251, "logps/chosen": -2.2409098148345947, "logps/rejected": -5.31826114654541, "loss": 0.7348, "nll_loss": 0.7093506455421448, "rewards/accuracies": 0.875, "rewards/chosen": -0.22409097850322723, "rewards/margins": 0.3077351450920105, "rewards/rejected": -0.5318261384963989, "step": 2652 }, { "epoch": 7.263518138261465, "grad_norm": 2.952150583267212, "learning_rate": 6.365753424657534e-07, "log_odds_chosen": 2.1065351963043213, "log_odds_ratio": -0.28882336616516113, "logits/chosen": 0.8771407008171082, "logits/rejected": 0.7770450711250305, "logps/chosen": -1.3788042068481445, "logps/rejected": -3.2738656997680664, "loss": 0.652, "nll_loss": 0.6230873465538025, "rewards/accuracies": 1.0, "rewards/chosen": -0.13788042962551117, "rewards/margins": 0.1895061433315277, "rewards/rejected": -0.3273865878582001, "step": 2653 }, { "epoch": 7.266255989048597, "grad_norm": 2.7366392612457275, "learning_rate": 6.364383561643835e-07, "log_odds_chosen": 1.5123099088668823, "log_odds_ratio": -0.30715668201446533, "logits/chosen": 0.5896280407905579, "logits/rejected": 0.5573617815971375, "logps/chosen": -2.4331777095794678, "logps/rejected": -3.8865432739257812, "loss": 0.7658, "nll_loss": 0.7350743412971497, "rewards/accuracies": 0.75, "rewards/chosen": -0.24331776797771454, "rewards/margins": 0.14533653855323792, "rewards/rejected": -0.38865432143211365, "step": 2654 }, { "epoch": 7.268993839835729, "grad_norm": 2.906214475631714, "learning_rate": 6.363013698630136e-07, "log_odds_chosen": 1.3869227170944214, "log_odds_ratio": -0.4126565158367157, "logits/chosen": 0.7455445528030396, "logits/rejected": 0.5802274942398071, "logps/chosen": -1.498694896697998, "logps/rejected": -2.7389001846313477, "loss": 0.7326, "nll_loss": 0.6912921667098999, "rewards/accuracies": 0.75, "rewards/chosen": -0.14986948668956757, "rewards/margins": 0.12402055412530899, "rewards/rejected": -0.27389001846313477, "step": 2655 }, { "epoch": 7.271731690622861, "grad_norm": 3.8086330890655518, "learning_rate": 6.361643835616438e-07, "log_odds_chosen": 3.218587875366211, "log_odds_ratio": -0.14635901153087616, "logits/chosen": 1.0104577541351318, "logits/rejected": 0.9342622756958008, "logps/chosen": -2.1840293407440186, "logps/rejected": -5.144737243652344, "loss": 0.619, "nll_loss": 0.6043646335601807, "rewards/accuracies": 1.0, "rewards/chosen": -0.2184029519557953, "rewards/margins": 0.29607075452804565, "rewards/rejected": -0.5144736766815186, "step": 2656 }, { "epoch": 7.274469541409993, "grad_norm": 3.0603106021881104, "learning_rate": 6.36027397260274e-07, "log_odds_chosen": 2.900869846343994, "log_odds_ratio": -0.18639235198497772, "logits/chosen": 0.847927987575531, "logits/rejected": 0.8141279816627502, "logps/chosen": -2.486370086669922, "logps/rejected": -5.215342998504639, "loss": 0.8002, "nll_loss": 0.7815966606140137, "rewards/accuracies": 1.0, "rewards/chosen": -0.24863699078559875, "rewards/margins": 0.27289730310440063, "rewards/rejected": -0.5215343832969666, "step": 2657 }, { "epoch": 7.277207392197125, "grad_norm": 2.7649519443511963, "learning_rate": 6.35890410958904e-07, "log_odds_chosen": 2.2418739795684814, "log_odds_ratio": -0.24032209813594818, "logits/chosen": 0.5974216461181641, "logits/rejected": 0.6000977158546448, "logps/chosen": -1.5883634090423584, "logps/rejected": -3.6853184700012207, "loss": 0.5953, "nll_loss": 0.5712280869483948, "rewards/accuracies": 0.875, "rewards/chosen": -0.15883633494377136, "rewards/margins": 0.209695503115654, "rewards/rejected": -0.36853188276290894, "step": 2658 }, { "epoch": 7.279945242984257, "grad_norm": 2.365502119064331, "learning_rate": 6.357534246575343e-07, "log_odds_chosen": 2.9498202800750732, "log_odds_ratio": -0.18225282430648804, "logits/chosen": 1.0377273559570312, "logits/rejected": 1.008521318435669, "logps/chosen": -1.7776509523391724, "logps/rejected": -4.568700313568115, "loss": 0.6618, "nll_loss": 0.643613338470459, "rewards/accuracies": 1.0, "rewards/chosen": -0.1777651011943817, "rewards/margins": 0.27910494804382324, "rewards/rejected": -0.45687004923820496, "step": 2659 }, { "epoch": 7.282683093771389, "grad_norm": 4.171314716339111, "learning_rate": 6.356164383561645e-07, "log_odds_chosen": 1.7849664688110352, "log_odds_ratio": -0.2646928131580353, "logits/chosen": 1.0133144855499268, "logits/rejected": 1.0127615928649902, "logps/chosen": -2.364053964614868, "logps/rejected": -3.962726354598999, "loss": 0.74, "nll_loss": 0.7135152816772461, "rewards/accuracies": 0.875, "rewards/chosen": -0.2364054024219513, "rewards/margins": 0.15986722707748413, "rewards/rejected": -0.3962726294994354, "step": 2660 }, { "epoch": 7.285420944558521, "grad_norm": 4.253516674041748, "learning_rate": 6.354794520547944e-07, "log_odds_chosen": 1.8736963272094727, "log_odds_ratio": -0.5789385437965393, "logits/chosen": 0.9949178099632263, "logits/rejected": 0.988019585609436, "logps/chosen": -2.6617894172668457, "logps/rejected": -4.477935791015625, "loss": 0.7484, "nll_loss": 0.6904733180999756, "rewards/accuracies": 0.875, "rewards/chosen": -0.2661789655685425, "rewards/margins": 0.18161459267139435, "rewards/rejected": -0.44779354333877563, "step": 2661 }, { "epoch": 7.2881587953456535, "grad_norm": 4.305246829986572, "learning_rate": 6.353424657534247e-07, "log_odds_chosen": 1.2840540409088135, "log_odds_ratio": -0.39038556814193726, "logits/chosen": 1.0915194749832153, "logits/rejected": 1.031149983406067, "logps/chosen": -2.5271377563476562, "logps/rejected": -3.7313544750213623, "loss": 0.7279, "nll_loss": 0.6888371706008911, "rewards/accuracies": 0.75, "rewards/chosen": -0.25271379947662354, "rewards/margins": 0.12042167782783508, "rewards/rejected": -0.37313544750213623, "step": 2662 }, { "epoch": 7.2908966461327855, "grad_norm": 2.7982699871063232, "learning_rate": 6.352054794520548e-07, "log_odds_chosen": 2.9975039958953857, "log_odds_ratio": -0.15319833159446716, "logits/chosen": 1.0816905498504639, "logits/rejected": 1.067686915397644, "logps/chosen": -2.186680793762207, "logps/rejected": -5.063628196716309, "loss": 0.7429, "nll_loss": 0.7275676727294922, "rewards/accuracies": 1.0, "rewards/chosen": -0.21866807341575623, "rewards/margins": 0.2876947224140167, "rewards/rejected": -0.506362795829773, "step": 2663 }, { "epoch": 7.293634496919918, "grad_norm": 8.710256576538086, "learning_rate": 6.350684931506849e-07, "log_odds_chosen": 0.6778941750526428, "log_odds_ratio": -0.8364704251289368, "logits/chosen": 0.6557959318161011, "logits/rejected": 0.6032747030258179, "logps/chosen": -2.751021146774292, "logps/rejected": -3.3010213375091553, "loss": 0.7891, "nll_loss": 0.7054624557495117, "rewards/accuracies": 0.75, "rewards/chosen": -0.2751021385192871, "rewards/margins": 0.05500001460313797, "rewards/rejected": -0.3301021456718445, "step": 2664 }, { "epoch": 7.29637234770705, "grad_norm": 3.3161613941192627, "learning_rate": 6.349315068493151e-07, "log_odds_chosen": 1.7534775733947754, "log_odds_ratio": -0.25119051337242126, "logits/chosen": 0.8175508379936218, "logits/rejected": 0.7962138056755066, "logps/chosen": -2.843517541885376, "logps/rejected": -4.528446674346924, "loss": 0.7791, "nll_loss": 0.7539519667625427, "rewards/accuracies": 1.0, "rewards/chosen": -0.28435173630714417, "rewards/margins": 0.16849292814731598, "rewards/rejected": -0.45284467935562134, "step": 2665 }, { "epoch": 7.299110198494182, "grad_norm": 3.119831085205078, "learning_rate": 6.347945205479452e-07, "log_odds_chosen": 2.8742847442626953, "log_odds_ratio": -0.21266508102416992, "logits/chosen": 0.5023027062416077, "logits/rejected": 0.4513327479362488, "logps/chosen": -1.4024417400360107, "logps/rejected": -4.021310329437256, "loss": 0.6793, "nll_loss": 0.6579980254173279, "rewards/accuracies": 1.0, "rewards/chosen": -0.14024417102336884, "rewards/margins": 0.2618868947029114, "rewards/rejected": -0.402131050825119, "step": 2666 }, { "epoch": 7.301848049281314, "grad_norm": 3.8531055450439453, "learning_rate": 6.346575342465754e-07, "log_odds_chosen": 1.8493685722351074, "log_odds_ratio": -0.2716739773750305, "logits/chosen": 1.159318208694458, "logits/rejected": 1.1388397216796875, "logps/chosen": -2.2653164863586426, "logps/rejected": -4.010992527008057, "loss": 0.6394, "nll_loss": 0.6122015714645386, "rewards/accuracies": 0.875, "rewards/chosen": -0.22653166949748993, "rewards/margins": 0.17456761002540588, "rewards/rejected": -0.4010992646217346, "step": 2667 }, { "epoch": 7.304585900068446, "grad_norm": 3.7840423583984375, "learning_rate": 6.345205479452055e-07, "log_odds_chosen": 0.8753718137741089, "log_odds_ratio": -0.37369513511657715, "logits/chosen": 0.8046852946281433, "logits/rejected": 0.7523442506790161, "logps/chosen": -1.854866623878479, "logps/rejected": -2.569037437438965, "loss": 0.6742, "nll_loss": 0.6368092894554138, "rewards/accuracies": 0.875, "rewards/chosen": -0.18548665940761566, "rewards/margins": 0.07141707837581635, "rewards/rejected": -0.256903737783432, "step": 2668 }, { "epoch": 7.307323750855579, "grad_norm": 3.822531223297119, "learning_rate": 6.343835616438356e-07, "log_odds_chosen": 2.6513657569885254, "log_odds_ratio": -0.3807096779346466, "logits/chosen": 1.028132677078247, "logits/rejected": 1.0313962697982788, "logps/chosen": -1.9700454473495483, "logps/rejected": -4.474609851837158, "loss": 0.7056, "nll_loss": 0.6675136685371399, "rewards/accuracies": 0.875, "rewards/chosen": -0.1970045566558838, "rewards/margins": 0.25045645236968994, "rewards/rejected": -0.44746100902557373, "step": 2669 }, { "epoch": 7.31006160164271, "grad_norm": 3.0086824893951416, "learning_rate": 6.342465753424658e-07, "log_odds_chosen": 2.5398669242858887, "log_odds_ratio": -0.25124314427375793, "logits/chosen": 0.8233640193939209, "logits/rejected": 0.842240035533905, "logps/chosen": -1.8486359119415283, "logps/rejected": -4.248083591461182, "loss": 0.6916, "nll_loss": 0.666493833065033, "rewards/accuracies": 1.0, "rewards/chosen": -0.1848635971546173, "rewards/margins": 0.23994478583335876, "rewards/rejected": -0.4248083531856537, "step": 2670 }, { "epoch": 7.312799452429843, "grad_norm": 3.316347599029541, "learning_rate": 6.341095890410959e-07, "log_odds_chosen": 1.127347469329834, "log_odds_ratio": -0.4397130310535431, "logits/chosen": 0.881549596786499, "logits/rejected": 0.8332964777946472, "logps/chosen": -1.8705775737762451, "logps/rejected": -2.8536012172698975, "loss": 0.6526, "nll_loss": 0.6086410284042358, "rewards/accuracies": 0.875, "rewards/chosen": -0.187057763338089, "rewards/margins": 0.09830236434936523, "rewards/rejected": -0.2853601276874542, "step": 2671 }, { "epoch": 7.315537303216975, "grad_norm": 3.1818747520446777, "learning_rate": 6.33972602739726e-07, "log_odds_chosen": 2.127732515335083, "log_odds_ratio": -0.2577299177646637, "logits/chosen": 0.790068507194519, "logits/rejected": 0.7725099325180054, "logps/chosen": -1.7544331550598145, "logps/rejected": -3.6196818351745605, "loss": 0.6201, "nll_loss": 0.5943113565444946, "rewards/accuracies": 0.875, "rewards/chosen": -0.17544332146644592, "rewards/margins": 0.1865248829126358, "rewards/rejected": -0.36196818947792053, "step": 2672 }, { "epoch": 7.318275154004107, "grad_norm": 3.9262828826904297, "learning_rate": 6.338356164383562e-07, "log_odds_chosen": 1.126488208770752, "log_odds_ratio": -0.3184780478477478, "logits/chosen": 0.532749593257904, "logits/rejected": 0.5290780067443848, "logps/chosen": -2.1476082801818848, "logps/rejected": -3.085453748703003, "loss": 0.7944, "nll_loss": 0.7625773549079895, "rewards/accuracies": 1.0, "rewards/chosen": -0.21476082503795624, "rewards/margins": 0.09378456324338913, "rewards/rejected": -0.30854538083076477, "step": 2673 }, { "epoch": 7.321013004791239, "grad_norm": 4.133833885192871, "learning_rate": 6.336986301369864e-07, "log_odds_chosen": 2.356872797012329, "log_odds_ratio": -0.4306510388851166, "logits/chosen": 0.8879326581954956, "logits/rejected": 0.9446818232536316, "logps/chosen": -2.0836217403411865, "logps/rejected": -4.306565284729004, "loss": 0.6526, "nll_loss": 0.6094892621040344, "rewards/accuracies": 0.875, "rewards/chosen": -0.20836219191551208, "rewards/margins": 0.22229433059692383, "rewards/rejected": -0.4306564927101135, "step": 2674 }, { "epoch": 7.323750855578371, "grad_norm": 4.893839359283447, "learning_rate": 6.335616438356164e-07, "log_odds_chosen": 1.9424359798431396, "log_odds_ratio": -0.6679455041885376, "logits/chosen": 1.3526067733764648, "logits/rejected": 1.3500139713287354, "logps/chosen": -3.198589324951172, "logps/rejected": -5.001076698303223, "loss": 0.7493, "nll_loss": 0.6824663877487183, "rewards/accuracies": 0.875, "rewards/chosen": -0.31985893845558167, "rewards/margins": 0.18024872243404388, "rewards/rejected": -0.5001076459884644, "step": 2675 }, { "epoch": 7.326488706365503, "grad_norm": 3.0534234046936035, "learning_rate": 6.334246575342466e-07, "log_odds_chosen": 1.9617074728012085, "log_odds_ratio": -0.32468897104263306, "logits/chosen": 0.8464595079421997, "logits/rejected": 0.8963078260421753, "logps/chosen": -2.352342128753662, "logps/rejected": -4.269919395446777, "loss": 0.6695, "nll_loss": 0.6369913220405579, "rewards/accuracies": 0.75, "rewards/chosen": -0.23523421585559845, "rewards/margins": 0.1917577087879181, "rewards/rejected": -0.42699193954467773, "step": 2676 }, { "epoch": 7.329226557152635, "grad_norm": 3.711951732635498, "learning_rate": 6.332876712328767e-07, "log_odds_chosen": 1.7026172876358032, "log_odds_ratio": -0.4328032433986664, "logits/chosen": 0.9789982438087463, "logits/rejected": 0.9353796243667603, "logps/chosen": -1.9010356664657593, "logps/rejected": -3.5008654594421387, "loss": 0.701, "nll_loss": 0.6577049493789673, "rewards/accuracies": 0.75, "rewards/chosen": -0.19010356068611145, "rewards/margins": 0.15998300909996033, "rewards/rejected": -0.3500865697860718, "step": 2677 }, { "epoch": 7.3319644079397674, "grad_norm": 4.1688232421875, "learning_rate": 6.331506849315068e-07, "log_odds_chosen": 2.305673122406006, "log_odds_ratio": -0.4130787253379822, "logits/chosen": 0.8330562114715576, "logits/rejected": 0.8300226926803589, "logps/chosen": -2.103726387023926, "logps/rejected": -4.297730445861816, "loss": 0.6572, "nll_loss": 0.6159246563911438, "rewards/accuracies": 0.75, "rewards/chosen": -0.210372656583786, "rewards/margins": 0.21940040588378906, "rewards/rejected": -0.4297730624675751, "step": 2678 }, { "epoch": 7.3347022587268995, "grad_norm": 2.73138165473938, "learning_rate": 6.33013698630137e-07, "log_odds_chosen": 2.3993024826049805, "log_odds_ratio": -0.3164789080619812, "logits/chosen": 0.5004944801330566, "logits/rejected": 0.489896297454834, "logps/chosen": -1.8356561660766602, "logps/rejected": -4.126690864562988, "loss": 0.7208, "nll_loss": 0.6891868114471436, "rewards/accuracies": 0.875, "rewards/chosen": -0.18356561660766602, "rewards/margins": 0.2291034609079361, "rewards/rejected": -0.4126690626144409, "step": 2679 }, { "epoch": 7.337440109514032, "grad_norm": 2.8276119232177734, "learning_rate": 6.328767123287671e-07, "log_odds_chosen": 3.429037094116211, "log_odds_ratio": -0.20051923394203186, "logits/chosen": 1.0641494989395142, "logits/rejected": 1.028921365737915, "logps/chosen": -1.8502576351165771, "logps/rejected": -5.099052906036377, "loss": 0.6279, "nll_loss": 0.6078208088874817, "rewards/accuracies": 0.875, "rewards/chosen": -0.18502575159072876, "rewards/margins": 0.32487952709198, "rewards/rejected": -0.5099053382873535, "step": 2680 }, { "epoch": 7.340177960301164, "grad_norm": 3.2292673587799072, "learning_rate": 6.327397260273972e-07, "log_odds_chosen": 2.6860103607177734, "log_odds_ratio": -0.35971909761428833, "logits/chosen": 0.6822937726974487, "logits/rejected": 0.6200828552246094, "logps/chosen": -1.593973159790039, "logps/rejected": -3.9948208332061768, "loss": 0.6186, "nll_loss": 0.5826581120491028, "rewards/accuracies": 0.875, "rewards/chosen": -0.15939731895923615, "rewards/margins": 0.24008476734161377, "rewards/rejected": -0.3994820713996887, "step": 2681 }, { "epoch": 7.342915811088296, "grad_norm": 2.799334764480591, "learning_rate": 6.326027397260274e-07, "log_odds_chosen": 2.389878511428833, "log_odds_ratio": -0.3797116279602051, "logits/chosen": 0.7894183397293091, "logits/rejected": 0.7531654834747314, "logps/chosen": -2.0952773094177246, "logps/rejected": -4.408778190612793, "loss": 0.7167, "nll_loss": 0.6787619590759277, "rewards/accuracies": 0.75, "rewards/chosen": -0.20952773094177246, "rewards/margins": 0.2313501238822937, "rewards/rejected": -0.44087785482406616, "step": 2682 }, { "epoch": 7.345653661875428, "grad_norm": 3.9841387271881104, "learning_rate": 6.324657534246575e-07, "log_odds_chosen": 2.4782257080078125, "log_odds_ratio": -0.2313258945941925, "logits/chosen": 1.0904016494750977, "logits/rejected": 1.1440905332565308, "logps/chosen": -2.0869321823120117, "logps/rejected": -4.444985866546631, "loss": 0.8036, "nll_loss": 0.7804548740386963, "rewards/accuracies": 1.0, "rewards/chosen": -0.2086932361125946, "rewards/margins": 0.23580537736415863, "rewards/rejected": -0.44449862837791443, "step": 2683 }, { "epoch": 7.34839151266256, "grad_norm": 3.197500705718994, "learning_rate": 6.323287671232877e-07, "log_odds_chosen": 1.3505170345306396, "log_odds_ratio": -0.3114973306655884, "logits/chosen": 0.7403810024261475, "logits/rejected": 0.8002928495407104, "logps/chosen": -2.125314235687256, "logps/rejected": -3.368306875228882, "loss": 0.6572, "nll_loss": 0.6260103583335876, "rewards/accuracies": 0.875, "rewards/chosen": -0.2125314325094223, "rewards/margins": 0.12429927289485931, "rewards/rejected": -0.3368307054042816, "step": 2684 }, { "epoch": 7.351129363449692, "grad_norm": 3.470867872238159, "learning_rate": 6.321917808219178e-07, "log_odds_chosen": 3.5680084228515625, "log_odds_ratio": -0.10845039039850235, "logits/chosen": 1.0397882461547852, "logits/rejected": 1.0685588121414185, "logps/chosen": -2.332552433013916, "logps/rejected": -5.742856979370117, "loss": 0.7034, "nll_loss": 0.6925519704818726, "rewards/accuracies": 1.0, "rewards/chosen": -0.23325523734092712, "rewards/margins": 0.34103041887283325, "rewards/rejected": -0.5742856860160828, "step": 2685 }, { "epoch": 7.353867214236824, "grad_norm": 2.946721076965332, "learning_rate": 6.320547945205479e-07, "log_odds_chosen": 1.807313084602356, "log_odds_ratio": -0.2933826744556427, "logits/chosen": 0.872814416885376, "logits/rejected": 0.862572968006134, "logps/chosen": -1.6327182054519653, "logps/rejected": -3.284008502960205, "loss": 0.618, "nll_loss": 0.5886826515197754, "rewards/accuracies": 1.0, "rewards/chosen": -0.16327182948589325, "rewards/margins": 0.16512903571128845, "rewards/rejected": -0.3284008502960205, "step": 2686 }, { "epoch": 7.356605065023956, "grad_norm": 3.606111526489258, "learning_rate": 6.319178082191781e-07, "log_odds_chosen": 2.867690324783325, "log_odds_ratio": -0.18516047298908234, "logits/chosen": 1.0894923210144043, "logits/rejected": 1.1155519485473633, "logps/chosen": -2.1540000438690186, "logps/rejected": -4.794364929199219, "loss": 0.7022, "nll_loss": 0.6836860179901123, "rewards/accuracies": 0.875, "rewards/chosen": -0.21540001034736633, "rewards/margins": 0.26403650641441345, "rewards/rejected": -0.4794365167617798, "step": 2687 }, { "epoch": 7.359342915811088, "grad_norm": 3.411055326461792, "learning_rate": 6.317808219178082e-07, "log_odds_chosen": 2.0770654678344727, "log_odds_ratio": -0.4942479431629181, "logits/chosen": 1.039286732673645, "logits/rejected": 1.032264232635498, "logps/chosen": -2.1733856201171875, "logps/rejected": -4.027416229248047, "loss": 0.7724, "nll_loss": 0.7230088710784912, "rewards/accuracies": 0.75, "rewards/chosen": -0.21733856201171875, "rewards/margins": 0.18540306389331818, "rewards/rejected": -0.4027416408061981, "step": 2688 }, { "epoch": 7.36208076659822, "grad_norm": 8.592611312866211, "learning_rate": 6.316438356164383e-07, "log_odds_chosen": 0.3212294578552246, "log_odds_ratio": -0.7330684661865234, "logits/chosen": 0.8666551113128662, "logits/rejected": 0.8089748024940491, "logps/chosen": -2.4968690872192383, "logps/rejected": -2.7325215339660645, "loss": 0.7426, "nll_loss": 0.6693342924118042, "rewards/accuracies": 0.75, "rewards/chosen": -0.24968691170215607, "rewards/margins": 0.023565255105495453, "rewards/rejected": -0.2732521593570709, "step": 2689 }, { "epoch": 7.364818617385352, "grad_norm": 2.9529809951782227, "learning_rate": 6.315068493150685e-07, "log_odds_chosen": 2.6588547229766846, "log_odds_ratio": -0.16905935108661652, "logits/chosen": 0.9207842946052551, "logits/rejected": 0.9683499336242676, "logps/chosen": -2.252138614654541, "logps/rejected": -4.770310878753662, "loss": 0.6717, "nll_loss": 0.6548014283180237, "rewards/accuracies": 1.0, "rewards/chosen": -0.22521385550498962, "rewards/margins": 0.2518172264099121, "rewards/rejected": -0.4770311117172241, "step": 2690 }, { "epoch": 7.367556468172484, "grad_norm": 3.518022298812866, "learning_rate": 6.313698630136987e-07, "log_odds_chosen": 4.991211891174316, "log_odds_ratio": -0.16411933302879333, "logits/chosen": 1.0603057146072388, "logits/rejected": 1.1123117208480835, "logps/chosen": -3.0320448875427246, "logps/rejected": -7.922496318817139, "loss": 0.676, "nll_loss": 0.659600555896759, "rewards/accuracies": 1.0, "rewards/chosen": -0.3032045364379883, "rewards/margins": 0.4890451431274414, "rewards/rejected": -0.7922496795654297, "step": 2691 }, { "epoch": 7.370294318959616, "grad_norm": 2.795271873474121, "learning_rate": 6.312328767123287e-07, "log_odds_chosen": 2.596991539001465, "log_odds_ratio": -0.15881529450416565, "logits/chosen": 0.8941035270690918, "logits/rejected": 0.8851940631866455, "logps/chosen": -2.022814989089966, "logps/rejected": -4.450102806091309, "loss": 0.6734, "nll_loss": 0.6574701070785522, "rewards/accuracies": 1.0, "rewards/chosen": -0.20228148996829987, "rewards/margins": 0.24272876977920532, "rewards/rejected": -0.44501030445098877, "step": 2692 }, { "epoch": 7.3730321697467485, "grad_norm": 2.9684972763061523, "learning_rate": 6.310958904109589e-07, "log_odds_chosen": 2.138416051864624, "log_odds_ratio": -0.19329948723316193, "logits/chosen": 1.2664923667907715, "logits/rejected": 1.2841771841049194, "logps/chosen": -1.9516088962554932, "logps/rejected": -3.966233015060425, "loss": 0.6213, "nll_loss": 0.6019927859306335, "rewards/accuracies": 1.0, "rewards/chosen": -0.1951608955860138, "rewards/margins": 0.20146241784095764, "rewards/rejected": -0.39662331342697144, "step": 2693 }, { "epoch": 7.3757700205338805, "grad_norm": 2.950798273086548, "learning_rate": 6.30958904109589e-07, "log_odds_chosen": 1.4207996129989624, "log_odds_ratio": -0.3436397314071655, "logits/chosen": 0.7720860242843628, "logits/rejected": 0.6712571382522583, "logps/chosen": -1.495948314666748, "logps/rejected": -2.758391857147217, "loss": 0.6323, "nll_loss": 0.597919225692749, "rewards/accuracies": 0.875, "rewards/chosen": -0.14959484338760376, "rewards/margins": 0.12624435126781464, "rewards/rejected": -0.2758391797542572, "step": 2694 }, { "epoch": 7.378507871321013, "grad_norm": 2.8847157955169678, "learning_rate": 6.308219178082191e-07, "log_odds_chosen": 1.4199624061584473, "log_odds_ratio": -0.3400249481201172, "logits/chosen": 0.6419512629508972, "logits/rejected": 0.6253001689910889, "logps/chosen": -1.56190025806427, "logps/rejected": -2.7808470726013184, "loss": 0.6538, "nll_loss": 0.6198161244392395, "rewards/accuracies": 1.0, "rewards/chosen": -0.15619003772735596, "rewards/margins": 0.12189467996358871, "rewards/rejected": -0.27808472514152527, "step": 2695 }, { "epoch": 7.381245722108146, "grad_norm": 2.4580929279327393, "learning_rate": 6.306849315068493e-07, "log_odds_chosen": 2.3678388595581055, "log_odds_ratio": -0.15721683204174042, "logits/chosen": 1.047410488128662, "logits/rejected": 1.0508928298950195, "logps/chosen": -1.5697598457336426, "logps/rejected": -3.742506504058838, "loss": 0.5959, "nll_loss": 0.5801879167556763, "rewards/accuracies": 1.0, "rewards/chosen": -0.15697599947452545, "rewards/margins": 0.21727466583251953, "rewards/rejected": -0.3742506504058838, "step": 2696 }, { "epoch": 7.383983572895277, "grad_norm": 2.76198148727417, "learning_rate": 6.305479452054794e-07, "log_odds_chosen": 3.8484854698181152, "log_odds_ratio": -0.14053332805633545, "logits/chosen": 0.7752537727355957, "logits/rejected": 0.78629070520401, "logps/chosen": -1.8610460758209229, "logps/rejected": -5.5346269607543945, "loss": 0.7722, "nll_loss": 0.7581461668014526, "rewards/accuracies": 1.0, "rewards/chosen": -0.18610461056232452, "rewards/margins": 0.36735808849334717, "rewards/rejected": -0.5534626841545105, "step": 2697 }, { "epoch": 7.38672142368241, "grad_norm": 3.3986141681671143, "learning_rate": 6.304109589041096e-07, "log_odds_chosen": 1.7159984111785889, "log_odds_ratio": -0.3441661596298218, "logits/chosen": 0.6126942038536072, "logits/rejected": 0.5469658970832825, "logps/chosen": -1.8343772888183594, "logps/rejected": -3.4277353286743164, "loss": 0.6755, "nll_loss": 0.6410926580429077, "rewards/accuracies": 0.75, "rewards/chosen": -0.18343773484230042, "rewards/margins": 0.15933577716350555, "rewards/rejected": -0.34277352690696716, "step": 2698 }, { "epoch": 7.389459274469542, "grad_norm": 7.838392734527588, "learning_rate": 6.302739726027397e-07, "log_odds_chosen": 1.280679702758789, "log_odds_ratio": -0.5285263657569885, "logits/chosen": 0.7976348996162415, "logits/rejected": 0.7857147455215454, "logps/chosen": -2.3722729682922363, "logps/rejected": -3.518846273422241, "loss": 0.738, "nll_loss": 0.6851813197135925, "rewards/accuracies": 0.75, "rewards/chosen": -0.23722730576992035, "rewards/margins": 0.11465732753276825, "rewards/rejected": -0.3518846035003662, "step": 2699 }, { "epoch": 7.392197125256674, "grad_norm": 3.2887115478515625, "learning_rate": 6.301369863013698e-07, "log_odds_chosen": 2.469209671020508, "log_odds_ratio": -0.17994612455368042, "logits/chosen": 0.6628448367118835, "logits/rejected": 0.6927108764648438, "logps/chosen": -1.594313383102417, "logps/rejected": -3.7927374839782715, "loss": 0.6013, "nll_loss": 0.5832644701004028, "rewards/accuracies": 1.0, "rewards/chosen": -0.1594313383102417, "rewards/margins": 0.21984240412712097, "rewards/rejected": -0.37927374243736267, "step": 2700 }, { "epoch": 7.394934976043806, "grad_norm": 6.679412841796875, "learning_rate": 6.3e-07, "log_odds_chosen": 0.5845957398414612, "log_odds_ratio": -0.9129875302314758, "logits/chosen": 0.785277247428894, "logits/rejected": 0.7596580386161804, "logps/chosen": -2.021204948425293, "logps/rejected": -2.4991791248321533, "loss": 0.8336, "nll_loss": 0.7423402667045593, "rewards/accuracies": 0.75, "rewards/chosen": -0.20212051272392273, "rewards/margins": 0.04779740795493126, "rewards/rejected": -0.2499178946018219, "step": 2701 }, { "epoch": 7.397672826830938, "grad_norm": 3.003371238708496, "learning_rate": 6.298630136986301e-07, "log_odds_chosen": 2.87904691696167, "log_odds_ratio": -0.13569916784763336, "logits/chosen": 1.0901033878326416, "logits/rejected": 1.1040410995483398, "logps/chosen": -2.0651142597198486, "logps/rejected": -4.781993865966797, "loss": 0.6933, "nll_loss": 0.6797267198562622, "rewards/accuracies": 1.0, "rewards/chosen": -0.20651143789291382, "rewards/margins": 0.27168798446655273, "rewards/rejected": -0.47819939255714417, "step": 2702 }, { "epoch": 7.40041067761807, "grad_norm": 5.654118061065674, "learning_rate": 6.297260273972602e-07, "log_odds_chosen": 1.067535400390625, "log_odds_ratio": -0.5101763010025024, "logits/chosen": 0.7388923764228821, "logits/rejected": 0.6737388968467712, "logps/chosen": -2.1627402305603027, "logps/rejected": -3.060636520385742, "loss": 0.8179, "nll_loss": 0.7668359279632568, "rewards/accuracies": 0.875, "rewards/chosen": -0.21627402305603027, "rewards/margins": 0.08978960663080215, "rewards/rejected": -0.3060636520385742, "step": 2703 }, { "epoch": 7.403148528405202, "grad_norm": 3.0116052627563477, "learning_rate": 6.295890410958904e-07, "log_odds_chosen": 3.7788639068603516, "log_odds_ratio": -0.09754819422960281, "logits/chosen": 1.175951600074768, "logits/rejected": 1.2005820274353027, "logps/chosen": -2.9836206436157227, "logps/rejected": -6.67506217956543, "loss": 0.7729, "nll_loss": 0.7631522417068481, "rewards/accuracies": 1.0, "rewards/chosen": -0.2983620762825012, "rewards/margins": 0.3691442310810089, "rewards/rejected": -0.667506217956543, "step": 2704 }, { "epoch": 7.405886379192334, "grad_norm": 3.2091526985168457, "learning_rate": 6.294520547945206e-07, "log_odds_chosen": 2.777193546295166, "log_odds_ratio": -0.3798038363456726, "logits/chosen": 0.7495467662811279, "logits/rejected": 0.7435716986656189, "logps/chosen": -1.6318354606628418, "logps/rejected": -4.1972246170043945, "loss": 0.6889, "nll_loss": 0.6508990526199341, "rewards/accuracies": 0.875, "rewards/chosen": -0.1631835401058197, "rewards/margins": 0.2565389573574066, "rewards/rejected": -0.4197224974632263, "step": 2705 }, { "epoch": 7.408624229979466, "grad_norm": 4.175766944885254, "learning_rate": 6.293150684931506e-07, "log_odds_chosen": 1.8840372562408447, "log_odds_ratio": -0.3530096113681793, "logits/chosen": 0.6884036660194397, "logits/rejected": 0.6799624562263489, "logps/chosen": -1.7290363311767578, "logps/rejected": -3.385744571685791, "loss": 0.6819, "nll_loss": 0.6465637683868408, "rewards/accuracies": 0.875, "rewards/chosen": -0.1729036271572113, "rewards/margins": 0.16567082703113556, "rewards/rejected": -0.33857443928718567, "step": 2706 }, { "epoch": 7.411362080766598, "grad_norm": 3.1878983974456787, "learning_rate": 6.291780821917808e-07, "log_odds_chosen": 2.016507625579834, "log_odds_ratio": -0.22118699550628662, "logits/chosen": 1.2985007762908936, "logits/rejected": 1.3298319578170776, "logps/chosen": -1.989417552947998, "logps/rejected": -3.9083919525146484, "loss": 0.6024, "nll_loss": 0.5803066492080688, "rewards/accuracies": 1.0, "rewards/chosen": -0.19894175231456757, "rewards/margins": 0.1918974220752716, "rewards/rejected": -0.39083918929100037, "step": 2707 }, { "epoch": 7.41409993155373, "grad_norm": 2.5805258750915527, "learning_rate": 6.290410958904109e-07, "log_odds_chosen": 3.0800883769989014, "log_odds_ratio": -0.20157578587532043, "logits/chosen": 0.7618658542633057, "logits/rejected": 0.7855392694473267, "logps/chosen": -1.5485281944274902, "logps/rejected": -4.449185371398926, "loss": 0.6329, "nll_loss": 0.6127023696899414, "rewards/accuracies": 1.0, "rewards/chosen": -0.15485282242298126, "rewards/margins": 0.2900657057762146, "rewards/rejected": -0.44491854310035706, "step": 2708 }, { "epoch": 7.416837782340862, "grad_norm": 2.836207628250122, "learning_rate": 6.28904109589041e-07, "log_odds_chosen": 3.0701820850372314, "log_odds_ratio": -0.19529585540294647, "logits/chosen": 0.9733434915542603, "logits/rejected": 0.9607568979263306, "logps/chosen": -1.6470211744308472, "logps/rejected": -4.509012222290039, "loss": 0.6688, "nll_loss": 0.6492255926132202, "rewards/accuracies": 0.875, "rewards/chosen": -0.16470211744308472, "rewards/margins": 0.28619909286499023, "rewards/rejected": -0.45090124011039734, "step": 2709 }, { "epoch": 7.4195756331279945, "grad_norm": 3.7471511363983154, "learning_rate": 6.287671232876712e-07, "log_odds_chosen": 2.295980453491211, "log_odds_ratio": -0.41991424560546875, "logits/chosen": 0.9819088578224182, "logits/rejected": 1.0154117345809937, "logps/chosen": -2.711531639099121, "logps/rejected": -4.903894901275635, "loss": 0.7717, "nll_loss": 0.7297059297561646, "rewards/accuracies": 0.875, "rewards/chosen": -0.27115315198898315, "rewards/margins": 0.21923628449440002, "rewards/rejected": -0.49038946628570557, "step": 2710 }, { "epoch": 7.422313483915127, "grad_norm": 4.313812732696533, "learning_rate": 6.286301369863013e-07, "log_odds_chosen": 2.2084405422210693, "log_odds_ratio": -0.6813858151435852, "logits/chosen": 0.8548213243484497, "logits/rejected": 0.8620378971099854, "logps/chosen": -2.738447666168213, "logps/rejected": -4.801266193389893, "loss": 0.7647, "nll_loss": 0.69659423828125, "rewards/accuracies": 0.875, "rewards/chosen": -0.27384477853775024, "rewards/margins": 0.206281840801239, "rewards/rejected": -0.48012661933898926, "step": 2711 }, { "epoch": 7.425051334702259, "grad_norm": 3.469261646270752, "learning_rate": 6.284931506849315e-07, "log_odds_chosen": 1.6757723093032837, "log_odds_ratio": -0.3653067350387573, "logits/chosen": 0.7471345067024231, "logits/rejected": 0.7200276851654053, "logps/chosen": -1.760176420211792, "logps/rejected": -3.347367763519287, "loss": 0.6656, "nll_loss": 0.629117488861084, "rewards/accuracies": 0.875, "rewards/chosen": -0.176017627120018, "rewards/margins": 0.15871913731098175, "rewards/rejected": -0.33473676443099976, "step": 2712 }, { "epoch": 7.427789185489391, "grad_norm": 5.377763748168945, "learning_rate": 6.283561643835616e-07, "log_odds_chosen": 0.8985546827316284, "log_odds_ratio": -0.7262334227561951, "logits/chosen": 0.7854241132736206, "logits/rejected": 0.7430839538574219, "logps/chosen": -2.1490180492401123, "logps/rejected": -2.876037120819092, "loss": 0.8192, "nll_loss": 0.7465313076972961, "rewards/accuracies": 0.75, "rewards/chosen": -0.21490181982517242, "rewards/margins": 0.07270187884569168, "rewards/rejected": -0.2876037061214447, "step": 2713 }, { "epoch": 7.430527036276523, "grad_norm": 2.9945144653320312, "learning_rate": 6.282191780821917e-07, "log_odds_chosen": 1.4147472381591797, "log_odds_ratio": -0.29308781027793884, "logits/chosen": 0.7493217587471008, "logits/rejected": 0.6807427406311035, "logps/chosen": -1.7947605848312378, "logps/rejected": -3.0620474815368652, "loss": 0.6312, "nll_loss": 0.601933479309082, "rewards/accuracies": 1.0, "rewards/chosen": -0.1794760525226593, "rewards/margins": 0.12672872841358185, "rewards/rejected": -0.30620479583740234, "step": 2714 }, { "epoch": 7.433264887063655, "grad_norm": 3.250591278076172, "learning_rate": 6.280821917808219e-07, "log_odds_chosen": 3.1769189834594727, "log_odds_ratio": -0.18510554730892181, "logits/chosen": 0.9593257308006287, "logits/rejected": 0.9511274099349976, "logps/chosen": -2.1253538131713867, "logps/rejected": -5.133152008056641, "loss": 0.7317, "nll_loss": 0.7131984829902649, "rewards/accuracies": 1.0, "rewards/chosen": -0.21253538131713867, "rewards/margins": 0.300779789686203, "rewards/rejected": -0.5133152008056641, "step": 2715 }, { "epoch": 7.436002737850787, "grad_norm": 3.6792683601379395, "learning_rate": 6.27945205479452e-07, "log_odds_chosen": 2.200366973876953, "log_odds_ratio": -0.26353245973587036, "logits/chosen": 1.091277837753296, "logits/rejected": 1.1090595722198486, "logps/chosen": -2.1711649894714355, "logps/rejected": -4.284585475921631, "loss": 0.6844, "nll_loss": 0.6580654978752136, "rewards/accuracies": 1.0, "rewards/chosen": -0.21711649000644684, "rewards/margins": 0.21134206652641296, "rewards/rejected": -0.428458571434021, "step": 2716 }, { "epoch": 7.438740588637919, "grad_norm": 5.224982261657715, "learning_rate": 6.278082191780821e-07, "log_odds_chosen": 2.3264153003692627, "log_odds_ratio": -0.8155341744422913, "logits/chosen": 0.6574527025222778, "logits/rejected": 0.6800081729888916, "logps/chosen": -2.4299466609954834, "logps/rejected": -4.6612443923950195, "loss": 0.9545, "nll_loss": 0.8729177713394165, "rewards/accuracies": 0.5, "rewards/chosen": -0.24299466609954834, "rewards/margins": 0.22312979400157928, "rewards/rejected": -0.4661244750022888, "step": 2717 }, { "epoch": 7.441478439425051, "grad_norm": 4.215110778808594, "learning_rate": 6.276712328767123e-07, "log_odds_chosen": 3.338942050933838, "log_odds_ratio": -0.2986948490142822, "logits/chosen": 0.9162731766700745, "logits/rejected": 0.8727264404296875, "logps/chosen": -2.1451756954193115, "logps/rejected": -5.310097694396973, "loss": 0.7078, "nll_loss": 0.6778990030288696, "rewards/accuracies": 0.875, "rewards/chosen": -0.21451756358146667, "rewards/margins": 0.3164921998977661, "rewards/rejected": -0.5310097932815552, "step": 2718 }, { "epoch": 7.444216290212183, "grad_norm": 3.4820775985717773, "learning_rate": 6.275342465753425e-07, "log_odds_chosen": 1.0957542657852173, "log_odds_ratio": -0.3390544056892395, "logits/chosen": 0.7527744174003601, "logits/rejected": 0.7708990573883057, "logps/chosen": -2.552000045776367, "logps/rejected": -3.566124439239502, "loss": 0.6995, "nll_loss": 0.665591835975647, "rewards/accuracies": 1.0, "rewards/chosen": -0.25519999861717224, "rewards/margins": 0.10141243040561676, "rewards/rejected": -0.3566124141216278, "step": 2719 }, { "epoch": 7.446954140999315, "grad_norm": 3.3245906829833984, "learning_rate": 6.273972602739725e-07, "log_odds_chosen": 1.6608469486236572, "log_odds_ratio": -0.35929054021835327, "logits/chosen": 0.5583723783493042, "logits/rejected": 0.5161128640174866, "logps/chosen": -1.6450395584106445, "logps/rejected": -3.151778221130371, "loss": 0.6912, "nll_loss": 0.6552624702453613, "rewards/accuracies": 0.875, "rewards/chosen": -0.16450396180152893, "rewards/margins": 0.15067386627197266, "rewards/rejected": -0.3151778280735016, "step": 2720 }, { "epoch": 7.449691991786447, "grad_norm": 3.537668466567993, "learning_rate": 6.272602739726027e-07, "log_odds_chosen": 1.529565453529358, "log_odds_ratio": -0.714474081993103, "logits/chosen": 0.5806872248649597, "logits/rejected": 0.6303545236587524, "logps/chosen": -2.0329203605651855, "logps/rejected": -3.461071014404297, "loss": 0.6889, "nll_loss": 0.617487370967865, "rewards/accuracies": 0.75, "rewards/chosen": -0.20329204201698303, "rewards/margins": 0.14281505346298218, "rewards/rejected": -0.3461071252822876, "step": 2721 }, { "epoch": 7.452429842573579, "grad_norm": 7.152990341186523, "learning_rate": 6.271232876712329e-07, "log_odds_chosen": 1.495955467224121, "log_odds_ratio": -0.5385549068450928, "logits/chosen": 1.1661796569824219, "logits/rejected": 1.156496524810791, "logps/chosen": -3.1005542278289795, "logps/rejected": -4.478128910064697, "loss": 0.7677, "nll_loss": 0.7138110995292664, "rewards/accuracies": 0.75, "rewards/chosen": -0.3100554347038269, "rewards/margins": 0.13775745034217834, "rewards/rejected": -0.44781288504600525, "step": 2722 }, { "epoch": 7.455167693360712, "grad_norm": 4.972532272338867, "learning_rate": 6.269863013698629e-07, "log_odds_chosen": 2.1879992485046387, "log_odds_ratio": -0.3154832720756531, "logits/chosen": 1.1182141304016113, "logits/rejected": 1.0642387866973877, "logps/chosen": -2.2382500171661377, "logps/rejected": -4.335575103759766, "loss": 0.7665, "nll_loss": 0.7349165678024292, "rewards/accuracies": 0.875, "rewards/chosen": -0.22382502257823944, "rewards/margins": 0.2097325176000595, "rewards/rejected": -0.43355754017829895, "step": 2723 }, { "epoch": 7.457905544147844, "grad_norm": 3.210014820098877, "learning_rate": 6.268493150684931e-07, "log_odds_chosen": 1.4110522270202637, "log_odds_ratio": -0.2997114062309265, "logits/chosen": 0.9405450224876404, "logits/rejected": 0.8909765481948853, "logps/chosen": -1.199364185333252, "logps/rejected": -2.3265843391418457, "loss": 0.6851, "nll_loss": 0.6550988554954529, "rewards/accuracies": 0.875, "rewards/chosen": -0.11993641406297684, "rewards/margins": 0.11272203177213669, "rewards/rejected": -0.23265846073627472, "step": 2724 }, { "epoch": 7.460643394934976, "grad_norm": 7.0774383544921875, "learning_rate": 6.267123287671232e-07, "log_odds_chosen": 1.462947130203247, "log_odds_ratio": -0.5188605785369873, "logits/chosen": 1.007178544998169, "logits/rejected": 0.9860224723815918, "logps/chosen": -2.3268723487854004, "logps/rejected": -3.67394757270813, "loss": 0.7164, "nll_loss": 0.6645457744598389, "rewards/accuracies": 0.875, "rewards/chosen": -0.23268723487854004, "rewards/margins": 0.13470754027366638, "rewards/rejected": -0.36739474534988403, "step": 2725 }, { "epoch": 7.4633812457221085, "grad_norm": 2.9527313709259033, "learning_rate": 6.265753424657534e-07, "log_odds_chosen": 1.9762122631072998, "log_odds_ratio": -0.21972671151161194, "logits/chosen": 0.9270064830780029, "logits/rejected": 0.8120443224906921, "logps/chosen": -1.387700080871582, "logps/rejected": -3.1216611862182617, "loss": 0.6564, "nll_loss": 0.634399950504303, "rewards/accuracies": 1.0, "rewards/chosen": -0.13877001404762268, "rewards/margins": 0.17339612543582916, "rewards/rejected": -0.31216612458229065, "step": 2726 }, { "epoch": 7.4661190965092405, "grad_norm": 7.273373126983643, "learning_rate": 6.264383561643835e-07, "log_odds_chosen": 2.5494351387023926, "log_odds_ratio": -0.22448670864105225, "logits/chosen": 0.9853191375732422, "logits/rejected": 0.938707709312439, "logps/chosen": -1.8191642761230469, "logps/rejected": -4.160165786743164, "loss": 0.7454, "nll_loss": 0.7229852080345154, "rewards/accuracies": 1.0, "rewards/chosen": -0.18191641569137573, "rewards/margins": 0.2341001331806183, "rewards/rejected": -0.4160165786743164, "step": 2727 }, { "epoch": 7.468856947296373, "grad_norm": 5.537271022796631, "learning_rate": 6.263013698630136e-07, "log_odds_chosen": 2.218919038772583, "log_odds_ratio": -0.2616910934448242, "logits/chosen": 0.8586046695709229, "logits/rejected": 0.8139891624450684, "logps/chosen": -1.9828391075134277, "logps/rejected": -3.9201173782348633, "loss": 0.7177, "nll_loss": 0.6915192604064941, "rewards/accuracies": 0.875, "rewards/chosen": -0.19828391075134277, "rewards/margins": 0.19372783601284027, "rewards/rejected": -0.39201176166534424, "step": 2728 }, { "epoch": 7.471594798083505, "grad_norm": 4.343054294586182, "learning_rate": 6.261643835616438e-07, "log_odds_chosen": 1.794489860534668, "log_odds_ratio": -0.40533313155174255, "logits/chosen": 0.48010388016700745, "logits/rejected": 0.4395746886730194, "logps/chosen": -2.364640474319458, "logps/rejected": -3.997591018676758, "loss": 0.7108, "nll_loss": 0.6702467799186707, "rewards/accuracies": 0.75, "rewards/chosen": -0.23646405339241028, "rewards/margins": 0.16329509019851685, "rewards/rejected": -0.39975911378860474, "step": 2729 }, { "epoch": 7.474332648870637, "grad_norm": 3.0798768997192383, "learning_rate": 6.260273972602739e-07, "log_odds_chosen": 2.710153818130493, "log_odds_ratio": -0.17120414972305298, "logits/chosen": 0.9391285181045532, "logits/rejected": 0.9290008544921875, "logps/chosen": -1.3193600177764893, "logps/rejected": -3.686601400375366, "loss": 0.6063, "nll_loss": 0.589205265045166, "rewards/accuracies": 1.0, "rewards/chosen": -0.13193601369857788, "rewards/margins": 0.23672416806221008, "rewards/rejected": -0.36866018176078796, "step": 2730 }, { "epoch": 7.477070499657769, "grad_norm": 2.917034149169922, "learning_rate": 6.25890410958904e-07, "log_odds_chosen": 2.5006563663482666, "log_odds_ratio": -0.1917957067489624, "logits/chosen": 1.0223968029022217, "logits/rejected": 1.0295186042785645, "logps/chosen": -1.4271597862243652, "logps/rejected": -3.665687084197998, "loss": 0.6002, "nll_loss": 0.5810561180114746, "rewards/accuracies": 0.875, "rewards/chosen": -0.1427159607410431, "rewards/margins": 0.2238527536392212, "rewards/rejected": -0.36656874418258667, "step": 2731 }, { "epoch": 7.479808350444901, "grad_norm": 3.204500675201416, "learning_rate": 6.257534246575342e-07, "log_odds_chosen": 3.8046793937683105, "log_odds_ratio": -0.20614556968212128, "logits/chosen": 0.8625819683074951, "logits/rejected": 0.8661024570465088, "logps/chosen": -2.9554319381713867, "logps/rejected": -6.618419647216797, "loss": 0.8041, "nll_loss": 0.7834453582763672, "rewards/accuracies": 0.875, "rewards/chosen": -0.29554319381713867, "rewards/margins": 0.36629876494407654, "rewards/rejected": -0.6618419885635376, "step": 2732 }, { "epoch": 7.482546201232033, "grad_norm": 3.105576276779175, "learning_rate": 6.256164383561644e-07, "log_odds_chosen": 1.3318727016448975, "log_odds_ratio": -0.38143977522850037, "logits/chosen": 0.7427699565887451, "logits/rejected": 0.7452836632728577, "logps/chosen": -1.6140241622924805, "logps/rejected": -2.7280778884887695, "loss": 0.6522, "nll_loss": 0.6140927076339722, "rewards/accuracies": 0.875, "rewards/chosen": -0.16140241920948029, "rewards/margins": 0.1114053726196289, "rewards/rejected": -0.272807776927948, "step": 2733 }, { "epoch": 7.485284052019165, "grad_norm": 3.1012184619903564, "learning_rate": 6.254794520547944e-07, "log_odds_chosen": 1.6522139310836792, "log_odds_ratio": -0.24794575572013855, "logits/chosen": 0.666473388671875, "logits/rejected": 0.6516934037208557, "logps/chosen": -1.2054885625839233, "logps/rejected": -2.5572054386138916, "loss": 0.6352, "nll_loss": 0.6103787422180176, "rewards/accuracies": 1.0, "rewards/chosen": -0.12054885178804398, "rewards/margins": 0.13517169654369354, "rewards/rejected": -0.2557205557823181, "step": 2734 }, { "epoch": 7.488021902806297, "grad_norm": 3.3025455474853516, "learning_rate": 6.253424657534246e-07, "log_odds_chosen": 1.82289457321167, "log_odds_ratio": -0.27880269289016724, "logits/chosen": 0.6478798985481262, "logits/rejected": 0.6169577240943909, "logps/chosen": -1.3620984554290771, "logps/rejected": -2.964712619781494, "loss": 0.6604, "nll_loss": 0.6325134038925171, "rewards/accuracies": 1.0, "rewards/chosen": -0.1362098604440689, "rewards/margins": 0.1602613925933838, "rewards/rejected": -0.2964712381362915, "step": 2735 }, { "epoch": 7.490759753593429, "grad_norm": 3.2398762702941895, "learning_rate": 6.252054794520548e-07, "log_odds_chosen": 4.691068649291992, "log_odds_ratio": -0.14950573444366455, "logits/chosen": 1.0843168497085571, "logits/rejected": 1.0971134901046753, "logps/chosen": -2.2882657051086426, "logps/rejected": -6.8762288093566895, "loss": 0.8294, "nll_loss": 0.8144866824150085, "rewards/accuracies": 1.0, "rewards/chosen": -0.2288265973329544, "rewards/margins": 0.45879629254341125, "rewards/rejected": -0.6876229047775269, "step": 2736 }, { "epoch": 7.493497604380561, "grad_norm": 5.138904094696045, "learning_rate": 6.250684931506848e-07, "log_odds_chosen": 1.5665634870529175, "log_odds_ratio": -0.4225601553916931, "logits/chosen": 0.723829984664917, "logits/rejected": 0.7023506760597229, "logps/chosen": -2.548935890197754, "logps/rejected": -3.9326841831207275, "loss": 0.8176, "nll_loss": 0.775302529335022, "rewards/accuracies": 0.875, "rewards/chosen": -0.25489360094070435, "rewards/margins": 0.13837483525276184, "rewards/rejected": -0.3932684361934662, "step": 2737 }, { "epoch": 7.496235455167693, "grad_norm": 6.440828800201416, "learning_rate": 6.24931506849315e-07, "log_odds_chosen": 2.9755804538726807, "log_odds_ratio": -0.4767962694168091, "logits/chosen": 0.9318028092384338, "logits/rejected": 0.9240943789482117, "logps/chosen": -2.5228796005249023, "logps/rejected": -5.389113903045654, "loss": 0.7419, "nll_loss": 0.6942461729049683, "rewards/accuracies": 0.875, "rewards/chosen": -0.25228798389434814, "rewards/margins": 0.28662344813346863, "rewards/rejected": -0.5389114022254944, "step": 2738 }, { "epoch": 7.498973305954825, "grad_norm": 3.083935260772705, "learning_rate": 6.247945205479451e-07, "log_odds_chosen": 1.7943918704986572, "log_odds_ratio": -0.3175625801086426, "logits/chosen": 0.6304791569709778, "logits/rejected": 0.6031202673912048, "logps/chosen": -2.0906970500946045, "logps/rejected": -3.7334296703338623, "loss": 0.6953, "nll_loss": 0.6635869145393372, "rewards/accuracies": 0.875, "rewards/chosen": -0.20906971395015717, "rewards/margins": 0.16427326202392578, "rewards/rejected": -0.37334299087524414, "step": 2739 }, { "epoch": 7.501711156741957, "grad_norm": 3.4438693523406982, "learning_rate": 6.246575342465754e-07, "log_odds_chosen": 1.342822551727295, "log_odds_ratio": -0.3187955617904663, "logits/chosen": 0.9062007665634155, "logits/rejected": 0.9131510257720947, "logps/chosen": -3.2308759689331055, "logps/rejected": -4.477513790130615, "loss": 0.7308, "nll_loss": 0.6989061832427979, "rewards/accuracies": 0.875, "rewards/chosen": -0.3230876326560974, "rewards/margins": 0.12466380000114441, "rewards/rejected": -0.44775140285491943, "step": 2740 }, { "epoch": 7.5044490075290895, "grad_norm": 3.2243199348449707, "learning_rate": 6.245205479452055e-07, "log_odds_chosen": 2.8264307975769043, "log_odds_ratio": -0.2257869690656662, "logits/chosen": 1.02602219581604, "logits/rejected": 1.0708606243133545, "logps/chosen": -2.9025986194610596, "logps/rejected": -5.671881675720215, "loss": 0.6993, "nll_loss": 0.6767356395721436, "rewards/accuracies": 0.875, "rewards/chosen": -0.29025986790657043, "rewards/margins": 0.27692827582359314, "rewards/rejected": -0.5671881437301636, "step": 2741 }, { "epoch": 7.507186858316222, "grad_norm": 2.9541845321655273, "learning_rate": 6.243835616438356e-07, "log_odds_chosen": 1.8430315256118774, "log_odds_ratio": -0.29525026679039, "logits/chosen": 0.6788783073425293, "logits/rejected": 0.5999413728713989, "logps/chosen": -1.7763268947601318, "logps/rejected": -3.4875576496124268, "loss": 0.7226, "nll_loss": 0.6930448412895203, "rewards/accuracies": 0.875, "rewards/chosen": -0.17763270437717438, "rewards/margins": 0.17112307250499725, "rewards/rejected": -0.34875577688217163, "step": 2742 }, { "epoch": 7.509924709103354, "grad_norm": 3.146899700164795, "learning_rate": 6.242465753424658e-07, "log_odds_chosen": 3.161381721496582, "log_odds_ratio": -0.21113920211791992, "logits/chosen": 0.7042866945266724, "logits/rejected": 0.6942260265350342, "logps/chosen": -1.7459481954574585, "logps/rejected": -4.6591033935546875, "loss": 0.655, "nll_loss": 0.6339176893234253, "rewards/accuracies": 1.0, "rewards/chosen": -0.17459481954574585, "rewards/margins": 0.291315495967865, "rewards/rejected": -0.46591031551361084, "step": 2743 }, { "epoch": 7.512662559890486, "grad_norm": 3.000964879989624, "learning_rate": 6.241095890410959e-07, "log_odds_chosen": 4.111701011657715, "log_odds_ratio": -0.05379393696784973, "logits/chosen": 0.8586580753326416, "logits/rejected": 0.8781516551971436, "logps/chosen": -2.112766742706299, "logps/rejected": -6.019460678100586, "loss": 0.7648, "nll_loss": 0.7594393491744995, "rewards/accuracies": 1.0, "rewards/chosen": -0.21127668023109436, "rewards/margins": 0.39066940546035767, "rewards/rejected": -0.6019460558891296, "step": 2744 }, { "epoch": 7.515400410677618, "grad_norm": 3.5384585857391357, "learning_rate": 6.23972602739726e-07, "log_odds_chosen": 1.7776843309402466, "log_odds_ratio": -0.462531715631485, "logits/chosen": 0.7231273651123047, "logits/rejected": 0.7119088768959045, "logps/chosen": -3.007012367248535, "logps/rejected": -4.680289268493652, "loss": 0.7011, "nll_loss": 0.6548454761505127, "rewards/accuracies": 0.875, "rewards/chosen": -0.3007012605667114, "rewards/margins": 0.16732768714427948, "rewards/rejected": -0.4680289626121521, "step": 2745 }, { "epoch": 7.51813826146475, "grad_norm": 4.002753734588623, "learning_rate": 6.238356164383562e-07, "log_odds_chosen": 0.33131980895996094, "log_odds_ratio": -0.7030779719352722, "logits/chosen": 0.9332084655761719, "logits/rejected": 0.9190674424171448, "logps/chosen": -1.8595432043075562, "logps/rejected": -2.0660407543182373, "loss": 0.6573, "nll_loss": 0.5869748592376709, "rewards/accuracies": 0.75, "rewards/chosen": -0.18595433235168457, "rewards/margins": 0.0206497460603714, "rewards/rejected": -0.20660407841205597, "step": 2746 }, { "epoch": 7.520876112251882, "grad_norm": 3.1203038692474365, "learning_rate": 6.236986301369864e-07, "log_odds_chosen": 2.1051015853881836, "log_odds_ratio": -0.22604002058506012, "logits/chosen": 1.0635786056518555, "logits/rejected": 1.0802884101867676, "logps/chosen": -1.7150402069091797, "logps/rejected": -3.642637014389038, "loss": 0.6399, "nll_loss": 0.6172900199890137, "rewards/accuracies": 1.0, "rewards/chosen": -0.17150402069091797, "rewards/margins": 0.1927596926689148, "rewards/rejected": -0.3642636835575104, "step": 2747 }, { "epoch": 7.523613963039015, "grad_norm": 3.225771427154541, "learning_rate": 6.235616438356164e-07, "log_odds_chosen": 2.4125280380249023, "log_odds_ratio": -0.2470860332250595, "logits/chosen": 0.7146941423416138, "logits/rejected": 0.7042964696884155, "logps/chosen": -2.0508458614349365, "logps/rejected": -4.3322553634643555, "loss": 0.6893, "nll_loss": 0.664557933807373, "rewards/accuracies": 1.0, "rewards/chosen": -0.20508459210395813, "rewards/margins": 0.22814098000526428, "rewards/rejected": -0.4332255721092224, "step": 2748 }, { "epoch": 7.526351813826146, "grad_norm": 3.334751844406128, "learning_rate": 6.234246575342466e-07, "log_odds_chosen": 1.5883030891418457, "log_odds_ratio": -0.26860684156417847, "logits/chosen": 0.970252275466919, "logits/rejected": 0.9667869210243225, "logps/chosen": -2.548280954360962, "logps/rejected": -4.041424751281738, "loss": 0.6881, "nll_loss": 0.6612788438796997, "rewards/accuracies": 1.0, "rewards/chosen": -0.2548281252384186, "rewards/margins": 0.14931432902812958, "rewards/rejected": -0.40414243936538696, "step": 2749 }, { "epoch": 7.529089664613279, "grad_norm": 5.089200496673584, "learning_rate": 6.232876712328768e-07, "log_odds_chosen": 3.057751417160034, "log_odds_ratio": -0.2795819938182831, "logits/chosen": 0.9173699617385864, "logits/rejected": 0.8986942768096924, "logps/chosen": -2.4117462635040283, "logps/rejected": -5.3360443115234375, "loss": 0.6652, "nll_loss": 0.637195348739624, "rewards/accuracies": 0.75, "rewards/chosen": -0.2411746382713318, "rewards/margins": 0.2924298346042633, "rewards/rejected": -0.5336044430732727, "step": 2750 }, { "epoch": 7.531827515400411, "grad_norm": 2.7079596519470215, "learning_rate": 6.231506849315068e-07, "log_odds_chosen": 2.209277868270874, "log_odds_ratio": -0.21192316710948944, "logits/chosen": 0.9451814889907837, "logits/rejected": 0.9466855525970459, "logps/chosen": -2.0581448078155518, "logps/rejected": -4.161746978759766, "loss": 0.6343, "nll_loss": 0.6130916476249695, "rewards/accuracies": 0.875, "rewards/chosen": -0.20581448078155518, "rewards/margins": 0.21036022901535034, "rewards/rejected": -0.4161747097969055, "step": 2751 }, { "epoch": 7.534565366187543, "grad_norm": 3.37028169631958, "learning_rate": 6.23013698630137e-07, "log_odds_chosen": 0.9562462568283081, "log_odds_ratio": -0.5655496716499329, "logits/chosen": 0.31473493576049805, "logits/rejected": 0.3459734320640564, "logps/chosen": -1.5754048824310303, "logps/rejected": -2.4490084648132324, "loss": 0.7402, "nll_loss": 0.6836156249046326, "rewards/accuracies": 0.75, "rewards/chosen": -0.15754050016403198, "rewards/margins": 0.08736033737659454, "rewards/rejected": -0.24490082263946533, "step": 2752 }, { "epoch": 7.537303216974675, "grad_norm": 3.165006399154663, "learning_rate": 6.228767123287671e-07, "log_odds_chosen": 2.33984112739563, "log_odds_ratio": -0.24453945457935333, "logits/chosen": 0.851664662361145, "logits/rejected": 0.8862705230712891, "logps/chosen": -2.011294364929199, "logps/rejected": -4.215424537658691, "loss": 0.7372, "nll_loss": 0.7127659320831299, "rewards/accuracies": 0.75, "rewards/chosen": -0.20112943649291992, "rewards/margins": 0.2204129993915558, "rewards/rejected": -0.4215424358844757, "step": 2753 }, { "epoch": 7.540041067761807, "grad_norm": 3.0550193786621094, "learning_rate": 6.227397260273973e-07, "log_odds_chosen": 2.422861337661743, "log_odds_ratio": -0.18979085981845856, "logits/chosen": 0.7029383182525635, "logits/rejected": 0.6795897483825684, "logps/chosen": -1.9067974090576172, "logps/rejected": -4.137141704559326, "loss": 0.7726, "nll_loss": 0.7536274194717407, "rewards/accuracies": 1.0, "rewards/chosen": -0.19067975878715515, "rewards/margins": 0.22303442656993866, "rewards/rejected": -0.4137141704559326, "step": 2754 }, { "epoch": 7.542778918548939, "grad_norm": 3.7784583568573, "learning_rate": 6.226027397260274e-07, "log_odds_chosen": 1.09296452999115, "log_odds_ratio": -0.3795231580734253, "logits/chosen": 0.7271826863288879, "logits/rejected": 0.7211266756057739, "logps/chosen": -2.2623329162597656, "logps/rejected": -3.2422385215759277, "loss": 0.6597, "nll_loss": 0.6217345595359802, "rewards/accuracies": 0.875, "rewards/chosen": -0.22623328864574432, "rewards/margins": 0.09799055755138397, "rewards/rejected": -0.3242238461971283, "step": 2755 }, { "epoch": 7.545516769336071, "grad_norm": 6.378973007202148, "learning_rate": 6.224657534246575e-07, "log_odds_chosen": 1.576692819595337, "log_odds_ratio": -0.545036792755127, "logits/chosen": 0.8994117975234985, "logits/rejected": 0.8991413116455078, "logps/chosen": -2.9887704849243164, "logps/rejected": -4.49092960357666, "loss": 0.6885, "nll_loss": 0.6339914798736572, "rewards/accuracies": 0.875, "rewards/chosen": -0.2988770604133606, "rewards/margins": 0.15021592378616333, "rewards/rejected": -0.4490929841995239, "step": 2756 }, { "epoch": 7.5482546201232035, "grad_norm": 3.527435779571533, "learning_rate": 6.223287671232877e-07, "log_odds_chosen": 1.798901915550232, "log_odds_ratio": -0.25742170214653015, "logits/chosen": 0.6651182770729065, "logits/rejected": 0.6155259609222412, "logps/chosen": -1.5652284622192383, "logps/rejected": -3.1361868381500244, "loss": 0.6432, "nll_loss": 0.6174678206443787, "rewards/accuracies": 1.0, "rewards/chosen": -0.15652284026145935, "rewards/margins": 0.15709584951400757, "rewards/rejected": -0.3136187195777893, "step": 2757 }, { "epoch": 7.5509924709103355, "grad_norm": 2.789651393890381, "learning_rate": 6.221917808219178e-07, "log_odds_chosen": 3.913254737854004, "log_odds_ratio": -0.1681443154811859, "logits/chosen": 0.6596197485923767, "logits/rejected": 0.6358352899551392, "logps/chosen": -2.496652841567993, "logps/rejected": -6.309648513793945, "loss": 0.7515, "nll_loss": 0.7347283363342285, "rewards/accuracies": 1.0, "rewards/chosen": -0.2496652901172638, "rewards/margins": 0.38129955530166626, "rewards/rejected": -0.6309648752212524, "step": 2758 }, { "epoch": 7.553730321697468, "grad_norm": 3.2564308643341064, "learning_rate": 6.220547945205479e-07, "log_odds_chosen": 1.3005619049072266, "log_odds_ratio": -0.3566224277019501, "logits/chosen": 0.7364391088485718, "logits/rejected": 0.626369833946228, "logps/chosen": -1.9334098100662231, "logps/rejected": -3.10825514793396, "loss": 0.7021, "nll_loss": 0.6664314270019531, "rewards/accuracies": 0.875, "rewards/chosen": -0.1933409869670868, "rewards/margins": 0.11748454719781876, "rewards/rejected": -0.31082552671432495, "step": 2759 }, { "epoch": 7.5564681724846, "grad_norm": 4.017405986785889, "learning_rate": 6.219178082191781e-07, "log_odds_chosen": 1.563305377960205, "log_odds_ratio": -0.28331074118614197, "logits/chosen": 1.1292251348495483, "logits/rejected": 1.1472669839859009, "logps/chosen": -2.5994961261749268, "logps/rejected": -4.0630669593811035, "loss": 0.6876, "nll_loss": 0.6592764854431152, "rewards/accuracies": 1.0, "rewards/chosen": -0.25994962453842163, "rewards/margins": 0.14635707437992096, "rewards/rejected": -0.4063067138195038, "step": 2760 }, { "epoch": 7.559206023271732, "grad_norm": 2.9014358520507812, "learning_rate": 6.217808219178083e-07, "log_odds_chosen": 3.529726505279541, "log_odds_ratio": -0.3386992812156677, "logits/chosen": 1.026397943496704, "logits/rejected": 1.0018501281738281, "logps/chosen": -1.4322788715362549, "logps/rejected": -4.785510540008545, "loss": 0.6535, "nll_loss": 0.6196337938308716, "rewards/accuracies": 0.75, "rewards/chosen": -0.14322790503501892, "rewards/margins": 0.33532315492630005, "rewards/rejected": -0.47855105996131897, "step": 2761 }, { "epoch": 7.561943874058864, "grad_norm": 3.3919248580932617, "learning_rate": 6.216438356164383e-07, "log_odds_chosen": 2.735008716583252, "log_odds_ratio": -0.27361974120140076, "logits/chosen": 0.8978131413459778, "logits/rejected": 0.9216762185096741, "logps/chosen": -2.183488368988037, "logps/rejected": -4.77304744720459, "loss": 0.7875, "nll_loss": 0.7601253390312195, "rewards/accuracies": 0.75, "rewards/chosen": -0.21834883093833923, "rewards/margins": 0.2589559257030487, "rewards/rejected": -0.47730472683906555, "step": 2762 }, { "epoch": 7.564681724845996, "grad_norm": 3.3871514797210693, "learning_rate": 6.215068493150685e-07, "log_odds_chosen": 3.7937448024749756, "log_odds_ratio": -0.14118662476539612, "logits/chosen": 0.8173900842666626, "logits/rejected": 0.7783003449440002, "logps/chosen": -2.053492307662964, "logps/rejected": -5.6674323081970215, "loss": 0.7421, "nll_loss": 0.7280145883560181, "rewards/accuracies": 1.0, "rewards/chosen": -0.20534923672676086, "rewards/margins": 0.3613939881324768, "rewards/rejected": -0.5667431950569153, "step": 2763 }, { "epoch": 7.567419575633128, "grad_norm": 3.753833293914795, "learning_rate": 6.213698630136987e-07, "log_odds_chosen": 1.2657010555267334, "log_odds_ratio": -0.37866270542144775, "logits/chosen": 0.6634017825126648, "logits/rejected": 0.6276699304580688, "logps/chosen": -1.9500263929367065, "logps/rejected": -3.0926599502563477, "loss": 0.7674, "nll_loss": 0.7295525074005127, "rewards/accuracies": 0.875, "rewards/chosen": -0.19500264525413513, "rewards/margins": 0.11426336318254471, "rewards/rejected": -0.30926600098609924, "step": 2764 }, { "epoch": 7.57015742642026, "grad_norm": 3.0898189544677734, "learning_rate": 6.212328767123287e-07, "log_odds_chosen": 1.6722172498703003, "log_odds_ratio": -0.2955871522426605, "logits/chosen": 0.9178518652915955, "logits/rejected": 0.9377302527427673, "logps/chosen": -1.9073516130447388, "logps/rejected": -3.4243011474609375, "loss": 0.7384, "nll_loss": 0.7088844776153564, "rewards/accuracies": 0.875, "rewards/chosen": -0.19073516130447388, "rewards/margins": 0.15169495344161987, "rewards/rejected": -0.34243011474609375, "step": 2765 }, { "epoch": 7.572895277207392, "grad_norm": 3.248439311981201, "learning_rate": 6.210958904109589e-07, "log_odds_chosen": 1.2929084300994873, "log_odds_ratio": -0.33420491218566895, "logits/chosen": 0.7575592994689941, "logits/rejected": 0.7128075361251831, "logps/chosen": -1.4772238731384277, "logps/rejected": -2.5875117778778076, "loss": 0.6851, "nll_loss": 0.6517155170440674, "rewards/accuracies": 0.875, "rewards/chosen": -0.14772239327430725, "rewards/margins": 0.11102880537509918, "rewards/rejected": -0.2587512135505676, "step": 2766 }, { "epoch": 7.575633127994524, "grad_norm": 3.3968276977539062, "learning_rate": 6.209589041095891e-07, "log_odds_chosen": 2.385633945465088, "log_odds_ratio": -0.3455427289009094, "logits/chosen": 0.8507916927337646, "logits/rejected": 0.7743193507194519, "logps/chosen": -2.852841377258301, "logps/rejected": -5.190000057220459, "loss": 0.7196, "nll_loss": 0.6849977970123291, "rewards/accuracies": 0.75, "rewards/chosen": -0.285284161567688, "rewards/margins": 0.23371586203575134, "rewards/rejected": -0.5190000534057617, "step": 2767 }, { "epoch": 7.578370978781656, "grad_norm": 4.317657947540283, "learning_rate": 6.208219178082191e-07, "log_odds_chosen": 1.646971583366394, "log_odds_ratio": -0.3103918135166168, "logits/chosen": 0.7644034624099731, "logits/rejected": 0.7679199576377869, "logps/chosen": -2.1237130165100098, "logps/rejected": -3.6567347049713135, "loss": 0.6922, "nll_loss": 0.6611824035644531, "rewards/accuracies": 0.875, "rewards/chosen": -0.21237130463123322, "rewards/margins": 0.1533021628856659, "rewards/rejected": -0.3656734824180603, "step": 2768 }, { "epoch": 7.581108829568788, "grad_norm": 3.1659393310546875, "learning_rate": 6.206849315068493e-07, "log_odds_chosen": 1.094308614730835, "log_odds_ratio": -0.3868839144706726, "logits/chosen": 0.7199131846427917, "logits/rejected": 0.7100420594215393, "logps/chosen": -2.1882100105285645, "logps/rejected": -3.185412883758545, "loss": 0.8193, "nll_loss": 0.7805805206298828, "rewards/accuracies": 0.875, "rewards/chosen": -0.2188209891319275, "rewards/margins": 0.09972026944160461, "rewards/rejected": -0.3185412883758545, "step": 2769 }, { "epoch": 7.58384668035592, "grad_norm": 6.476784706115723, "learning_rate": 6.205479452054794e-07, "log_odds_chosen": 3.2091290950775146, "log_odds_ratio": -0.43124833703041077, "logits/chosen": 0.8159080147743225, "logits/rejected": 0.740101158618927, "logps/chosen": -2.2796037197113037, "logps/rejected": -5.355059623718262, "loss": 0.7379, "nll_loss": 0.6948216557502747, "rewards/accuracies": 0.875, "rewards/chosen": -0.22796036303043365, "rewards/margins": 0.30754557251930237, "rewards/rejected": -0.5355059504508972, "step": 2770 }, { "epoch": 7.586584531143052, "grad_norm": 3.00986647605896, "learning_rate": 6.204109589041096e-07, "log_odds_chosen": 4.323798656463623, "log_odds_ratio": -0.21226491034030914, "logits/chosen": 0.8911408185958862, "logits/rejected": 0.9470953941345215, "logps/chosen": -3.086033821105957, "logps/rejected": -7.354731559753418, "loss": 0.8707, "nll_loss": 0.8494846820831299, "rewards/accuracies": 0.75, "rewards/chosen": -0.3086033761501312, "rewards/margins": 0.42686980962753296, "rewards/rejected": -0.7354732155799866, "step": 2771 }, { "epoch": 7.5893223819301845, "grad_norm": 2.9155781269073486, "learning_rate": 6.202739726027397e-07, "log_odds_chosen": 2.0692391395568848, "log_odds_ratio": -0.35435721278190613, "logits/chosen": 0.8915262818336487, "logits/rejected": 0.8542081713676453, "logps/chosen": -1.7513504028320312, "logps/rejected": -3.666652202606201, "loss": 0.7237, "nll_loss": 0.6882795095443726, "rewards/accuracies": 0.875, "rewards/chosen": -0.1751350313425064, "rewards/margins": 0.19153019785881042, "rewards/rejected": -0.36666521430015564, "step": 2772 }, { "epoch": 7.592060232717317, "grad_norm": 3.0171194076538086, "learning_rate": 6.201369863013698e-07, "log_odds_chosen": 2.223938465118408, "log_odds_ratio": -0.24653303623199463, "logits/chosen": 0.5791628360748291, "logits/rejected": 0.45393550395965576, "logps/chosen": -1.511521339416504, "logps/rejected": -3.4939777851104736, "loss": 0.708, "nll_loss": 0.6833409070968628, "rewards/accuracies": 0.875, "rewards/chosen": -0.1511521339416504, "rewards/margins": 0.19824565947055817, "rewards/rejected": -0.34939777851104736, "step": 2773 }, { "epoch": 7.594798083504449, "grad_norm": 3.4378445148468018, "learning_rate": 6.2e-07, "log_odds_chosen": 2.7390213012695312, "log_odds_ratio": -0.15311989188194275, "logits/chosen": 0.9881792068481445, "logits/rejected": 1.0314480066299438, "logps/chosen": -2.223341941833496, "logps/rejected": -4.846782684326172, "loss": 0.763, "nll_loss": 0.7476906180381775, "rewards/accuracies": 1.0, "rewards/chosen": -0.22233420610427856, "rewards/margins": 0.26234403252601624, "rewards/rejected": -0.4846782386302948, "step": 2774 }, { "epoch": 7.597535934291582, "grad_norm": 2.8980109691619873, "learning_rate": 6.198630136986301e-07, "log_odds_chosen": 2.4590718746185303, "log_odds_ratio": -0.28518325090408325, "logits/chosen": 0.6952394247055054, "logits/rejected": 0.5954697132110596, "logps/chosen": -1.3986423015594482, "logps/rejected": -3.6457204818725586, "loss": 0.6992, "nll_loss": 0.670648455619812, "rewards/accuracies": 1.0, "rewards/chosen": -0.1398642361164093, "rewards/margins": 0.22470781207084656, "rewards/rejected": -0.36457204818725586, "step": 2775 }, { "epoch": 7.600273785078713, "grad_norm": 4.196502685546875, "learning_rate": 6.197260273972602e-07, "log_odds_chosen": 1.553532361984253, "log_odds_ratio": -0.5377392768859863, "logits/chosen": 0.7744031548500061, "logits/rejected": 0.7927524447441101, "logps/chosen": -3.0896108150482178, "logps/rejected": -4.576056003570557, "loss": 0.7486, "nll_loss": 0.6948699355125427, "rewards/accuracies": 0.75, "rewards/chosen": -0.30896106362342834, "rewards/margins": 0.14864453673362732, "rewards/rejected": -0.45760560035705566, "step": 2776 }, { "epoch": 7.603011635865846, "grad_norm": 3.803659677505493, "learning_rate": 6.195890410958904e-07, "log_odds_chosen": 1.4648387432098389, "log_odds_ratio": -0.3856792151927948, "logits/chosen": 0.618219256401062, "logits/rejected": 0.5525109767913818, "logps/chosen": -2.135448932647705, "logps/rejected": -3.47678279876709, "loss": 0.7019, "nll_loss": 0.6633325219154358, "rewards/accuracies": 0.875, "rewards/chosen": -0.21354490518569946, "rewards/margins": 0.13413339853286743, "rewards/rejected": -0.3476783037185669, "step": 2777 }, { "epoch": 7.605749486652978, "grad_norm": 3.246828556060791, "learning_rate": 6.194520547945206e-07, "log_odds_chosen": 3.7869491577148438, "log_odds_ratio": -0.09970023483037949, "logits/chosen": 0.8607550263404846, "logits/rejected": 0.794524073600769, "logps/chosen": -1.4330761432647705, "logps/rejected": -4.960057258605957, "loss": 0.6555, "nll_loss": 0.645567774772644, "rewards/accuracies": 1.0, "rewards/chosen": -0.1433076113462448, "rewards/margins": 0.35269808769226074, "rewards/rejected": -0.49600571393966675, "step": 2778 }, { "epoch": 7.60848733744011, "grad_norm": 3.2471680641174316, "learning_rate": 6.193150684931506e-07, "log_odds_chosen": 2.9396910667419434, "log_odds_ratio": -0.16068458557128906, "logits/chosen": 0.7034573554992676, "logits/rejected": 0.7003757953643799, "logps/chosen": -1.9038639068603516, "logps/rejected": -4.69949197769165, "loss": 0.6723, "nll_loss": 0.6562137603759766, "rewards/accuracies": 1.0, "rewards/chosen": -0.19038639962673187, "rewards/margins": 0.2795628309249878, "rewards/rejected": -0.46994921565055847, "step": 2779 }, { "epoch": 7.611225188227242, "grad_norm": 2.9202628135681152, "learning_rate": 6.191780821917808e-07, "log_odds_chosen": 1.663021445274353, "log_odds_ratio": -0.3000863194465637, "logits/chosen": 0.923999547958374, "logits/rejected": 0.9210553169250488, "logps/chosen": -2.2869274616241455, "logps/rejected": -3.848771572113037, "loss": 0.6732, "nll_loss": 0.6431757211685181, "rewards/accuracies": 1.0, "rewards/chosen": -0.22869272530078888, "rewards/margins": 0.15618443489074707, "rewards/rejected": -0.38487717509269714, "step": 2780 }, { "epoch": 7.613963039014374, "grad_norm": 3.4305076599121094, "learning_rate": 6.19041095890411e-07, "log_odds_chosen": 1.7957417964935303, "log_odds_ratio": -0.5275640487670898, "logits/chosen": 0.8580564260482788, "logits/rejected": 0.844363808631897, "logps/chosen": -2.3165197372436523, "logps/rejected": -4.073836803436279, "loss": 0.7675, "nll_loss": 0.7147073149681091, "rewards/accuracies": 0.625, "rewards/chosen": -0.23165196180343628, "rewards/margins": 0.17573168873786926, "rewards/rejected": -0.4073837101459503, "step": 2781 }, { "epoch": 7.616700889801506, "grad_norm": 4.649407386779785, "learning_rate": 6.18904109589041e-07, "log_odds_chosen": 1.225171446800232, "log_odds_ratio": -0.41192710399627686, "logits/chosen": 1.0437328815460205, "logits/rejected": 1.05186128616333, "logps/chosen": -2.4075512886047363, "logps/rejected": -3.4914979934692383, "loss": 0.6494, "nll_loss": 0.6082068085670471, "rewards/accuracies": 0.875, "rewards/chosen": -0.2407551407814026, "rewards/margins": 0.10839468240737915, "rewards/rejected": -0.34914982318878174, "step": 2782 }, { "epoch": 7.619438740588638, "grad_norm": 2.752916097640991, "learning_rate": 6.187671232876712e-07, "log_odds_chosen": 2.2248756885528564, "log_odds_ratio": -0.30032846331596375, "logits/chosen": 0.8213105797767639, "logits/rejected": 0.7567728757858276, "logps/chosen": -1.9840906858444214, "logps/rejected": -4.087954044342041, "loss": 0.6907, "nll_loss": 0.6606665849685669, "rewards/accuracies": 1.0, "rewards/chosen": -0.1984090507030487, "rewards/margins": 0.21038632094860077, "rewards/rejected": -0.40879538655281067, "step": 2783 }, { "epoch": 7.62217659137577, "grad_norm": 3.1498875617980957, "learning_rate": 6.186301369863013e-07, "log_odds_chosen": 1.8100075721740723, "log_odds_ratio": -0.2898137867450714, "logits/chosen": 0.7509281635284424, "logits/rejected": 0.6943134665489197, "logps/chosen": -1.9376943111419678, "logps/rejected": -3.620988368988037, "loss": 0.6167, "nll_loss": 0.5876834392547607, "rewards/accuracies": 1.0, "rewards/chosen": -0.1937694102525711, "rewards/margins": 0.16832943260669708, "rewards/rejected": -0.3620988726615906, "step": 2784 }, { "epoch": 7.624914442162902, "grad_norm": 3.4614920616149902, "learning_rate": 6.184931506849315e-07, "log_odds_chosen": 2.69071626663208, "log_odds_ratio": -0.30402541160583496, "logits/chosen": 0.6356707215309143, "logits/rejected": 0.6083763837814331, "logps/chosen": -2.101717948913574, "logps/rejected": -4.702963352203369, "loss": 0.7343, "nll_loss": 0.7039031982421875, "rewards/accuracies": 1.0, "rewards/chosen": -0.21017178893089294, "rewards/margins": 0.2601246237754822, "rewards/rejected": -0.47029638290405273, "step": 2785 }, { "epoch": 7.627652292950034, "grad_norm": 2.9937899112701416, "learning_rate": 6.183561643835616e-07, "log_odds_chosen": 3.1611011028289795, "log_odds_ratio": -0.1969836950302124, "logits/chosen": 0.7924659252166748, "logits/rejected": 0.7809531688690186, "logps/chosen": -1.9001673460006714, "logps/rejected": -4.913825988769531, "loss": 0.6962, "nll_loss": 0.6765230894088745, "rewards/accuracies": 0.875, "rewards/chosen": -0.1900167167186737, "rewards/margins": 0.3013658821582794, "rewards/rejected": -0.4913825988769531, "step": 2786 }, { "epoch": 7.630390143737166, "grad_norm": 2.9283628463745117, "learning_rate": 6.182191780821917e-07, "log_odds_chosen": 2.2238197326660156, "log_odds_ratio": -0.24453642964363098, "logits/chosen": 0.8512749671936035, "logits/rejected": 0.8881140947341919, "logps/chosen": -2.0086779594421387, "logps/rejected": -4.101117134094238, "loss": 0.6816, "nll_loss": 0.6571140289306641, "rewards/accuracies": 1.0, "rewards/chosen": -0.20086780190467834, "rewards/margins": 0.20924395322799683, "rewards/rejected": -0.41011175513267517, "step": 2787 }, { "epoch": 7.6331279945242985, "grad_norm": 3.3914506435394287, "learning_rate": 6.180821917808219e-07, "log_odds_chosen": 3.572725534439087, "log_odds_ratio": -0.20594413578510284, "logits/chosen": 0.9878506660461426, "logits/rejected": 0.9472401142120361, "logps/chosen": -1.4850050210952759, "logps/rejected": -4.6628241539001465, "loss": 0.5817, "nll_loss": 0.5610811114311218, "rewards/accuracies": 0.875, "rewards/chosen": -0.1485005021095276, "rewards/margins": 0.317781925201416, "rewards/rejected": -0.4662824273109436, "step": 2788 }, { "epoch": 7.6358658453114305, "grad_norm": 3.2504334449768066, "learning_rate": 6.17945205479452e-07, "log_odds_chosen": 2.4561331272125244, "log_odds_ratio": -0.2061942219734192, "logits/chosen": 0.633092999458313, "logits/rejected": 0.6263536810874939, "logps/chosen": -1.8624393939971924, "logps/rejected": -4.159073829650879, "loss": 0.752, "nll_loss": 0.7314005494117737, "rewards/accuracies": 1.0, "rewards/chosen": -0.1862439513206482, "rewards/margins": 0.22966346144676208, "rewards/rejected": -0.41590744256973267, "step": 2789 }, { "epoch": 7.638603696098563, "grad_norm": 3.291159152984619, "learning_rate": 6.178082191780821e-07, "log_odds_chosen": 2.747645854949951, "log_odds_ratio": -0.13943040370941162, "logits/chosen": 0.9053558111190796, "logits/rejected": 0.8896121382713318, "logps/chosen": -1.8129618167877197, "logps/rejected": -4.3743720054626465, "loss": 0.6565, "nll_loss": 0.6425301432609558, "rewards/accuracies": 1.0, "rewards/chosen": -0.1812961846590042, "rewards/margins": 0.2561410069465637, "rewards/rejected": -0.4374372363090515, "step": 2790 }, { "epoch": 7.641341546885695, "grad_norm": 2.984325885772705, "learning_rate": 6.176712328767123e-07, "log_odds_chosen": 1.1907613277435303, "log_odds_ratio": -0.3790474236011505, "logits/chosen": 0.9176660180091858, "logits/rejected": 0.8670732378959656, "logps/chosen": -1.218332290649414, "logps/rejected": -2.243854284286499, "loss": 0.5972, "nll_loss": 0.5592683553695679, "rewards/accuracies": 1.0, "rewards/chosen": -0.12183323502540588, "rewards/margins": 0.10255219787359238, "rewards/rejected": -0.22438544034957886, "step": 2791 }, { "epoch": 7.644079397672827, "grad_norm": 4.917450904846191, "learning_rate": 6.175342465753425e-07, "log_odds_chosen": 0.8585187792778015, "log_odds_ratio": -0.42874792218208313, "logits/chosen": 0.7452744841575623, "logits/rejected": 0.6538764834403992, "logps/chosen": -1.9075528383255005, "logps/rejected": -2.6492576599121094, "loss": 0.7111, "nll_loss": 0.6682416796684265, "rewards/accuracies": 0.75, "rewards/chosen": -0.19075530767440796, "rewards/margins": 0.07417047023773193, "rewards/rejected": -0.2649257779121399, "step": 2792 }, { "epoch": 7.646817248459959, "grad_norm": 2.8977549076080322, "learning_rate": 6.173972602739725e-07, "log_odds_chosen": 2.111152172088623, "log_odds_ratio": -0.18848168849945068, "logits/chosen": 0.7764675617218018, "logits/rejected": 0.6906636953353882, "logps/chosen": -1.5453922748565674, "logps/rejected": -3.41233491897583, "loss": 0.6683, "nll_loss": 0.6494982242584229, "rewards/accuracies": 1.0, "rewards/chosen": -0.15453922748565674, "rewards/margins": 0.18669426441192627, "rewards/rejected": -0.341233491897583, "step": 2793 }, { "epoch": 7.649555099247091, "grad_norm": 2.9479339122772217, "learning_rate": 6.172602739726027e-07, "log_odds_chosen": 3.58525013923645, "log_odds_ratio": -0.13686980307102203, "logits/chosen": 1.0029183626174927, "logits/rejected": 0.9972589612007141, "logps/chosen": -2.0616536140441895, "logps/rejected": -5.480388641357422, "loss": 0.653, "nll_loss": 0.6393303871154785, "rewards/accuracies": 0.875, "rewards/chosen": -0.2061653584241867, "rewards/margins": 0.34187352657318115, "rewards/rejected": -0.548038899898529, "step": 2794 }, { "epoch": 7.652292950034223, "grad_norm": 2.666708469390869, "learning_rate": 6.171232876712329e-07, "log_odds_chosen": 3.1280174255371094, "log_odds_ratio": -0.2481553703546524, "logits/chosen": 0.662311315536499, "logits/rejected": 0.5190697908401489, "logps/chosen": -1.552382230758667, "logps/rejected": -4.525308609008789, "loss": 0.7151, "nll_loss": 0.690260648727417, "rewards/accuracies": 0.875, "rewards/chosen": -0.15523822605609894, "rewards/margins": 0.29729264974594116, "rewards/rejected": -0.4525308609008789, "step": 2795 }, { "epoch": 7.655030800821355, "grad_norm": 8.2881441116333, "learning_rate": 6.169863013698629e-07, "log_odds_chosen": 0.5698837637901306, "log_odds_ratio": -0.7666186094284058, "logits/chosen": 0.891856849193573, "logits/rejected": 0.828475832939148, "logps/chosen": -2.2422094345092773, "logps/rejected": -2.5938310623168945, "loss": 0.8414, "nll_loss": 0.764702558517456, "rewards/accuracies": 0.625, "rewards/chosen": -0.22422096133232117, "rewards/margins": 0.03516218066215515, "rewards/rejected": -0.25938311219215393, "step": 2796 }, { "epoch": 7.657768651608487, "grad_norm": 4.32669734954834, "learning_rate": 6.168493150684931e-07, "log_odds_chosen": 1.0637781620025635, "log_odds_ratio": -0.5245129466056824, "logits/chosen": 0.816970944404602, "logits/rejected": 0.8168612718582153, "logps/chosen": -2.424900531768799, "logps/rejected": -3.389356851577759, "loss": 0.6931, "nll_loss": 0.6406651139259338, "rewards/accuracies": 0.875, "rewards/chosen": -0.24249008297920227, "rewards/margins": 0.09644560515880585, "rewards/rejected": -0.3389357030391693, "step": 2797 }, { "epoch": 7.660506502395619, "grad_norm": 2.7723872661590576, "learning_rate": 6.167123287671233e-07, "log_odds_chosen": 2.0809996128082275, "log_odds_ratio": -0.19609639048576355, "logits/chosen": 0.9316930174827576, "logits/rejected": 0.94202721118927, "logps/chosen": -1.9438459873199463, "logps/rejected": -3.887474536895752, "loss": 0.6538, "nll_loss": 0.6341850757598877, "rewards/accuracies": 1.0, "rewards/chosen": -0.1943846046924591, "rewards/margins": 0.19436286389827728, "rewards/rejected": -0.3887474834918976, "step": 2798 }, { "epoch": 7.663244353182751, "grad_norm": 3.3154330253601074, "learning_rate": 6.165753424657534e-07, "log_odds_chosen": 2.142106056213379, "log_odds_ratio": -0.4823085069656372, "logits/chosen": 0.8532785177230835, "logits/rejected": 0.8758975267410278, "logps/chosen": -2.264491081237793, "logps/rejected": -4.3095479011535645, "loss": 0.7424, "nll_loss": 0.6941817402839661, "rewards/accuracies": 0.625, "rewards/chosen": -0.22644910216331482, "rewards/margins": 0.20450572669506073, "rewards/rejected": -0.43095481395721436, "step": 2799 }, { "epoch": 7.665982203969883, "grad_norm": 2.7701897621154785, "learning_rate": 6.164383561643835e-07, "log_odds_chosen": 3.347579002380371, "log_odds_ratio": -0.24023230373859406, "logits/chosen": 0.9117798805236816, "logits/rejected": 0.8687551021575928, "logps/chosen": -1.7171599864959717, "logps/rejected": -4.913547515869141, "loss": 0.6781, "nll_loss": 0.6540313959121704, "rewards/accuracies": 0.875, "rewards/chosen": -0.17171600461006165, "rewards/margins": 0.31963878870010376, "rewards/rejected": -0.4913548231124878, "step": 2800 }, { "epoch": 7.668720054757015, "grad_norm": 3.8149566650390625, "learning_rate": 6.163013698630136e-07, "log_odds_chosen": 1.4589736461639404, "log_odds_ratio": -0.4048336446285248, "logits/chosen": 0.8212082386016846, "logits/rejected": 0.7123391628265381, "logps/chosen": -1.7083590030670166, "logps/rejected": -3.069827079772949, "loss": 0.7369, "nll_loss": 0.6963709592819214, "rewards/accuracies": 1.0, "rewards/chosen": -0.17083591222763062, "rewards/margins": 0.13614682853221893, "rewards/rejected": -0.30698275566101074, "step": 2801 }, { "epoch": 7.671457905544148, "grad_norm": 3.2701187133789062, "learning_rate": 6.161643835616438e-07, "log_odds_chosen": 3.5421319007873535, "log_odds_ratio": -0.29567140340805054, "logits/chosen": 0.685943603515625, "logits/rejected": 0.5296372175216675, "logps/chosen": -1.7636265754699707, "logps/rejected": -5.0985612869262695, "loss": 0.7082, "nll_loss": 0.6786584258079529, "rewards/accuracies": 0.875, "rewards/chosen": -0.17636266350746155, "rewards/margins": 0.3334934115409851, "rewards/rejected": -0.509856104850769, "step": 2802 }, { "epoch": 7.6741957563312795, "grad_norm": 4.3142924308776855, "learning_rate": 6.160273972602739e-07, "log_odds_chosen": 2.3760898113250732, "log_odds_ratio": -0.25929778814315796, "logits/chosen": 1.0569486618041992, "logits/rejected": 1.1018931865692139, "logps/chosen": -2.977125644683838, "logps/rejected": -5.225790500640869, "loss": 0.7201, "nll_loss": 0.6941899061203003, "rewards/accuracies": 0.75, "rewards/chosen": -0.2977125644683838, "rewards/margins": 0.22486647963523865, "rewards/rejected": -0.5225790143013, "step": 2803 }, { "epoch": 7.676933607118412, "grad_norm": 4.427637100219727, "learning_rate": 6.15890410958904e-07, "log_odds_chosen": 1.3348033428192139, "log_odds_ratio": -0.5352680683135986, "logits/chosen": 0.931739330291748, "logits/rejected": 0.9246041774749756, "logps/chosen": -1.9300055503845215, "logps/rejected": -3.1729283332824707, "loss": 0.7149, "nll_loss": 0.6613778471946716, "rewards/accuracies": 0.75, "rewards/chosen": -0.19300056993961334, "rewards/margins": 0.1242922693490982, "rewards/rejected": -0.31729283928871155, "step": 2804 }, { "epoch": 7.6796714579055445, "grad_norm": 2.9181861877441406, "learning_rate": 6.157534246575342e-07, "log_odds_chosen": 3.281540632247925, "log_odds_ratio": -0.20301853120326996, "logits/chosen": 0.8455173969268799, "logits/rejected": 0.8143371343612671, "logps/chosen": -1.7160511016845703, "logps/rejected": -4.836134433746338, "loss": 0.6903, "nll_loss": 0.670043408870697, "rewards/accuracies": 1.0, "rewards/chosen": -0.17160511016845703, "rewards/margins": 0.3120083808898926, "rewards/rejected": -0.4836134910583496, "step": 2805 }, { "epoch": 7.682409308692677, "grad_norm": 3.094935894012451, "learning_rate": 6.156164383561644e-07, "log_odds_chosen": 4.361837863922119, "log_odds_ratio": -0.12695620954036713, "logits/chosen": 0.7309374809265137, "logits/rejected": 0.7687819600105286, "logps/chosen": -2.1163012981414795, "logps/rejected": -6.326905727386475, "loss": 0.7083, "nll_loss": 0.6955821514129639, "rewards/accuracies": 1.0, "rewards/chosen": -0.21163012087345123, "rewards/margins": 0.4210604429244995, "rewards/rejected": -0.6326905488967896, "step": 2806 }, { "epoch": 7.685147159479809, "grad_norm": 2.9243662357330322, "learning_rate": 6.154794520547944e-07, "log_odds_chosen": 2.8974101543426514, "log_odds_ratio": -0.269027441740036, "logits/chosen": 0.6206880211830139, "logits/rejected": 0.5677233934402466, "logps/chosen": -1.667441487312317, "logps/rejected": -4.428137302398682, "loss": 0.6696, "nll_loss": 0.6427303552627563, "rewards/accuracies": 0.75, "rewards/chosen": -0.1667441427707672, "rewards/margins": 0.2760695815086365, "rewards/rejected": -0.4428137540817261, "step": 2807 }, { "epoch": 7.687885010266941, "grad_norm": 3.375214099884033, "learning_rate": 6.153424657534246e-07, "log_odds_chosen": 1.0550180673599243, "log_odds_ratio": -0.34379082918167114, "logits/chosen": 0.8983776569366455, "logits/rejected": 0.9062886238098145, "logps/chosen": -1.8391870260238647, "logps/rejected": -2.7713141441345215, "loss": 0.7034, "nll_loss": 0.6689901947975159, "rewards/accuracies": 1.0, "rewards/chosen": -0.18391871452331543, "rewards/margins": 0.09321272373199463, "rewards/rejected": -0.27713143825531006, "step": 2808 }, { "epoch": 7.690622861054073, "grad_norm": 2.9034030437469482, "learning_rate": 6.152054794520548e-07, "log_odds_chosen": 3.833381414413452, "log_odds_ratio": -0.15743279457092285, "logits/chosen": 0.6932547092437744, "logits/rejected": 0.6547428965568542, "logps/chosen": -1.6982204914093018, "logps/rejected": -5.347899436950684, "loss": 0.7107, "nll_loss": 0.6950020790100098, "rewards/accuracies": 1.0, "rewards/chosen": -0.16982205212116241, "rewards/margins": 0.3649679124355316, "rewards/rejected": -0.5347899794578552, "step": 2809 }, { "epoch": 7.693360711841205, "grad_norm": 3.7641894817352295, "learning_rate": 6.150684931506848e-07, "log_odds_chosen": 2.272825241088867, "log_odds_ratio": -0.27084535360336304, "logits/chosen": 0.6699808239936829, "logits/rejected": 0.6273860335350037, "logps/chosen": -1.1909828186035156, "logps/rejected": -3.1687512397766113, "loss": 0.5725, "nll_loss": 0.5453736186027527, "rewards/accuracies": 0.875, "rewards/chosen": -0.11909827589988708, "rewards/margins": 0.19777682423591614, "rewards/rejected": -0.3168751299381256, "step": 2810 }, { "epoch": 7.696098562628337, "grad_norm": 5.114238739013672, "learning_rate": 6.14931506849315e-07, "log_odds_chosen": 2.661649227142334, "log_odds_ratio": -0.25437331199645996, "logits/chosen": 0.9477624297142029, "logits/rejected": 0.9339162707328796, "logps/chosen": -1.7272119522094727, "logps/rejected": -4.256926536560059, "loss": 0.6423, "nll_loss": 0.6168245673179626, "rewards/accuracies": 1.0, "rewards/chosen": -0.17272120714187622, "rewards/margins": 0.25297150015830994, "rewards/rejected": -0.42569267749786377, "step": 2811 }, { "epoch": 7.698836413415469, "grad_norm": 3.291905641555786, "learning_rate": 6.147945205479452e-07, "log_odds_chosen": 1.4667019844055176, "log_odds_ratio": -0.4792943298816681, "logits/chosen": 1.1911622285842896, "logits/rejected": 1.2214767932891846, "logps/chosen": -2.9163904190063477, "logps/rejected": -4.3065690994262695, "loss": 0.732, "nll_loss": 0.684051513671875, "rewards/accuracies": 0.75, "rewards/chosen": -0.2916390597820282, "rewards/margins": 0.13901783525943756, "rewards/rejected": -0.4306568503379822, "step": 2812 }, { "epoch": 7.701574264202601, "grad_norm": 3.04443097114563, "learning_rate": 6.146575342465753e-07, "log_odds_chosen": 2.574878454208374, "log_odds_ratio": -0.1912335604429245, "logits/chosen": 1.048840045928955, "logits/rejected": 1.0833088159561157, "logps/chosen": -2.1345717906951904, "logps/rejected": -4.580016613006592, "loss": 0.6661, "nll_loss": 0.6469378471374512, "rewards/accuracies": 1.0, "rewards/chosen": -0.21345719695091248, "rewards/margins": 0.24454449117183685, "rewards/rejected": -0.4580017030239105, "step": 2813 }, { "epoch": 7.704312114989733, "grad_norm": 3.352654218673706, "learning_rate": 6.145205479452054e-07, "log_odds_chosen": 3.8162598609924316, "log_odds_ratio": -0.039692461490631104, "logits/chosen": 1.0027185678482056, "logits/rejected": 1.046122670173645, "logps/chosen": -3.061343193054199, "logps/rejected": -6.6706037521362305, "loss": 0.8227, "nll_loss": 0.8187593221664429, "rewards/accuracies": 1.0, "rewards/chosen": -0.30613434314727783, "rewards/margins": 0.3609260320663452, "rewards/rejected": -0.667060375213623, "step": 2814 }, { "epoch": 7.707049965776865, "grad_norm": 3.844374895095825, "learning_rate": 6.143835616438355e-07, "log_odds_chosen": 1.7108358144760132, "log_odds_ratio": -0.4101852774620056, "logits/chosen": 0.6157025098800659, "logits/rejected": 0.5522124767303467, "logps/chosen": -2.3700737953186035, "logps/rejected": -4.006428241729736, "loss": 0.6686, "nll_loss": 0.6275748610496521, "rewards/accuracies": 0.75, "rewards/chosen": -0.23700737953186035, "rewards/margins": 0.16363543272018433, "rewards/rejected": -0.4006428122520447, "step": 2815 }, { "epoch": 7.709787816563997, "grad_norm": 4.380235195159912, "learning_rate": 6.142465753424657e-07, "log_odds_chosen": 3.175987958908081, "log_odds_ratio": -0.35466986894607544, "logits/chosen": 0.9660009741783142, "logits/rejected": 0.9570024013519287, "logps/chosen": -1.6377556324005127, "logps/rejected": -4.555420875549316, "loss": 0.6785, "nll_loss": 0.6430793404579163, "rewards/accuracies": 0.875, "rewards/chosen": -0.16377554833889008, "rewards/margins": 0.291766494512558, "rewards/rejected": -0.45554202795028687, "step": 2816 }, { "epoch": 7.712525667351129, "grad_norm": 3.061140537261963, "learning_rate": 6.141095890410958e-07, "log_odds_chosen": 1.3634417057037354, "log_odds_ratio": -0.316674143075943, "logits/chosen": 0.7030156254768372, "logits/rejected": 0.5729702711105347, "logps/chosen": -1.874216079711914, "logps/rejected": -3.132507801055908, "loss": 0.7735, "nll_loss": 0.7417833209037781, "rewards/accuracies": 1.0, "rewards/chosen": -0.18742161989212036, "rewards/margins": 0.12582914531230927, "rewards/rejected": -0.31325075030326843, "step": 2817 }, { "epoch": 7.715263518138261, "grad_norm": 4.269467353820801, "learning_rate": 6.139726027397259e-07, "log_odds_chosen": 1.9733741283416748, "log_odds_ratio": -0.4244288206100464, "logits/chosen": 1.110041618347168, "logits/rejected": 1.1529873609542847, "logps/chosen": -2.341583728790283, "logps/rejected": -4.179566860198975, "loss": 0.6914, "nll_loss": 0.6489778161048889, "rewards/accuracies": 0.75, "rewards/chosen": -0.23415836691856384, "rewards/margins": 0.18379831314086914, "rewards/rejected": -0.4179566502571106, "step": 2818 }, { "epoch": 7.7180013689253935, "grad_norm": 5.432999610900879, "learning_rate": 6.138356164383561e-07, "log_odds_chosen": 1.3242802619934082, "log_odds_ratio": -0.49262869358062744, "logits/chosen": 0.5753412842750549, "logits/rejected": 0.5517693758010864, "logps/chosen": -2.1774935722351074, "logps/rejected": -3.26593279838562, "loss": 0.6895, "nll_loss": 0.640277087688446, "rewards/accuracies": 0.875, "rewards/chosen": -0.21774938702583313, "rewards/margins": 0.10884392261505127, "rewards/rejected": -0.3265933096408844, "step": 2819 }, { "epoch": 7.7207392197125255, "grad_norm": 3.538774013519287, "learning_rate": 6.136986301369864e-07, "log_odds_chosen": 1.3335505723953247, "log_odds_ratio": -0.3577752709388733, "logits/chosen": 1.0982304811477661, "logits/rejected": 1.1308242082595825, "logps/chosen": -2.438783645629883, "logps/rejected": -3.7034049034118652, "loss": 0.6849, "nll_loss": 0.6491069197654724, "rewards/accuracies": 0.875, "rewards/chosen": -0.24387836456298828, "rewards/margins": 0.1264621466398239, "rewards/rejected": -0.370340496301651, "step": 2820 }, { "epoch": 7.723477070499658, "grad_norm": 2.92634654045105, "learning_rate": 6.135616438356163e-07, "log_odds_chosen": 2.913027286529541, "log_odds_ratio": -0.17639756202697754, "logits/chosen": 0.8797733783721924, "logits/rejected": 0.8680914640426636, "logps/chosen": -2.609936475753784, "logps/rejected": -5.409150123596191, "loss": 0.7054, "nll_loss": 0.6877968311309814, "rewards/accuracies": 1.0, "rewards/chosen": -0.2609936594963074, "rewards/margins": 0.27992135286331177, "rewards/rejected": -0.5409150123596191, "step": 2821 }, { "epoch": 7.72621492128679, "grad_norm": 7.250729560852051, "learning_rate": 6.134246575342466e-07, "log_odds_chosen": 1.6140894889831543, "log_odds_ratio": -0.5856807827949524, "logits/chosen": 0.919479250907898, "logits/rejected": 0.8815566301345825, "logps/chosen": -2.125199794769287, "logps/rejected": -3.630016803741455, "loss": 0.7773, "nll_loss": 0.7187535762786865, "rewards/accuracies": 0.875, "rewards/chosen": -0.21252000331878662, "rewards/margins": 0.150481715798378, "rewards/rejected": -0.3630017042160034, "step": 2822 }, { "epoch": 7.728952772073922, "grad_norm": 3.4299564361572266, "learning_rate": 6.132876712328768e-07, "log_odds_chosen": 2.758430004119873, "log_odds_ratio": -0.11948245763778687, "logits/chosen": 1.132214069366455, "logits/rejected": 1.168530821800232, "logps/chosen": -2.4583940505981445, "logps/rejected": -5.099872589111328, "loss": 0.6585, "nll_loss": 0.6465989947319031, "rewards/accuracies": 1.0, "rewards/chosen": -0.24583940207958221, "rewards/margins": 0.2641478478908539, "rewards/rejected": -0.5099872350692749, "step": 2823 }, { "epoch": 7.731690622861054, "grad_norm": 2.7871885299682617, "learning_rate": 6.131506849315067e-07, "log_odds_chosen": 2.1140007972717285, "log_odds_ratio": -0.24975332617759705, "logits/chosen": 0.8910670280456543, "logits/rejected": 0.9035282135009766, "logps/chosen": -1.877669095993042, "logps/rejected": -3.8197145462036133, "loss": 0.5852, "nll_loss": 0.560202956199646, "rewards/accuracies": 0.875, "rewards/chosen": -0.1877669095993042, "rewards/margins": 0.19420453906059265, "rewards/rejected": -0.38197144865989685, "step": 2824 }, { "epoch": 7.734428473648186, "grad_norm": 3.638561248779297, "learning_rate": 6.13013698630137e-07, "log_odds_chosen": 4.103028774261475, "log_odds_ratio": -0.18610379099845886, "logits/chosen": 1.140085220336914, "logits/rejected": 1.1751796007156372, "logps/chosen": -1.9931825399398804, "logps/rejected": -5.936967849731445, "loss": 0.6585, "nll_loss": 0.6398683190345764, "rewards/accuracies": 0.875, "rewards/chosen": -0.19931824505329132, "rewards/margins": 0.39437854290008545, "rewards/rejected": -0.5936967730522156, "step": 2825 }, { "epoch": 7.737166324435318, "grad_norm": 3.270117998123169, "learning_rate": 6.128767123287672e-07, "log_odds_chosen": 2.091801404953003, "log_odds_ratio": -0.2889987826347351, "logits/chosen": 1.256701946258545, "logits/rejected": 1.264710783958435, "logps/chosen": -2.304368257522583, "logps/rejected": -4.288051605224609, "loss": 0.6369, "nll_loss": 0.6080471277236938, "rewards/accuracies": 0.75, "rewards/chosen": -0.23043683171272278, "rewards/margins": 0.19836832582950592, "rewards/rejected": -0.4288051724433899, "step": 2826 }, { "epoch": 7.73990417522245, "grad_norm": 3.586280107498169, "learning_rate": 6.127397260273973e-07, "log_odds_chosen": 2.6679112911224365, "log_odds_ratio": -0.20621314644813538, "logits/chosen": 0.7219635248184204, "logits/rejected": 0.6730395555496216, "logps/chosen": -2.2309048175811768, "logps/rejected": -4.713780403137207, "loss": 0.7096, "nll_loss": 0.688971996307373, "rewards/accuracies": 0.875, "rewards/chosen": -0.22309048473834991, "rewards/margins": 0.24828755855560303, "rewards/rejected": -0.47137802839279175, "step": 2827 }, { "epoch": 7.742642026009582, "grad_norm": 4.528294086456299, "learning_rate": 6.126027397260274e-07, "log_odds_chosen": 1.9597632884979248, "log_odds_ratio": -0.21237283945083618, "logits/chosen": 0.8309427499771118, "logits/rejected": 0.8476783633232117, "logps/chosen": -2.405785322189331, "logps/rejected": -4.2596540451049805, "loss": 0.7618, "nll_loss": 0.7405242323875427, "rewards/accuracies": 1.0, "rewards/chosen": -0.2405785322189331, "rewards/margins": 0.18538689613342285, "rewards/rejected": -0.42596542835235596, "step": 2828 }, { "epoch": 7.745379876796715, "grad_norm": 3.0607147216796875, "learning_rate": 6.124657534246576e-07, "log_odds_chosen": 2.128304958343506, "log_odds_ratio": -0.22471407055854797, "logits/chosen": 1.0057692527770996, "logits/rejected": 0.9696390628814697, "logps/chosen": -1.9004253149032593, "logps/rejected": -3.8991827964782715, "loss": 0.7008, "nll_loss": 0.6783750057220459, "rewards/accuracies": 1.0, "rewards/chosen": -0.19004252552986145, "rewards/margins": 0.19987574219703674, "rewards/rejected": -0.3899182677268982, "step": 2829 }, { "epoch": 7.748117727583846, "grad_norm": 4.76981258392334, "learning_rate": 6.123287671232877e-07, "log_odds_chosen": 0.9225220680236816, "log_odds_ratio": -0.5352537631988525, "logits/chosen": 0.6479034423828125, "logits/rejected": 0.6486692428588867, "logps/chosen": -2.4937996864318848, "logps/rejected": -3.335702419281006, "loss": 0.8491, "nll_loss": 0.7955538034439087, "rewards/accuracies": 0.625, "rewards/chosen": -0.2493799477815628, "rewards/margins": 0.08419029414653778, "rewards/rejected": -0.3335702419281006, "step": 2830 }, { "epoch": 7.750855578370979, "grad_norm": 3.053473472595215, "learning_rate": 6.121917808219178e-07, "log_odds_chosen": 2.523477792739868, "log_odds_ratio": -0.21598650515079498, "logits/chosen": 0.7540157437324524, "logits/rejected": 0.6667412519454956, "logps/chosen": -1.7900173664093018, "logps/rejected": -4.008193492889404, "loss": 0.7242, "nll_loss": 0.702642560005188, "rewards/accuracies": 1.0, "rewards/chosen": -0.17900171875953674, "rewards/margins": 0.22181761264801025, "rewards/rejected": -0.4008193612098694, "step": 2831 }, { "epoch": 7.753593429158111, "grad_norm": 2.720430850982666, "learning_rate": 6.120547945205479e-07, "log_odds_chosen": 3.07780122756958, "log_odds_ratio": -0.16164734959602356, "logits/chosen": 0.716134786605835, "logits/rejected": 0.6585611701011658, "logps/chosen": -1.6748595237731934, "logps/rejected": -4.550278186798096, "loss": 0.6789, "nll_loss": 0.6627275347709656, "rewards/accuracies": 1.0, "rewards/chosen": -0.16748595237731934, "rewards/margins": 0.2875418961048126, "rewards/rejected": -0.45502781867980957, "step": 2832 }, { "epoch": 7.756331279945243, "grad_norm": 3.0798819065093994, "learning_rate": 6.119178082191781e-07, "log_odds_chosen": 2.834639549255371, "log_odds_ratio": -0.2442244291305542, "logits/chosen": 0.7816736698150635, "logits/rejected": 0.7160141468048096, "logps/chosen": -2.3351798057556152, "logps/rejected": -5.089679718017578, "loss": 0.7393, "nll_loss": 0.7149055004119873, "rewards/accuracies": 0.875, "rewards/chosen": -0.23351797461509705, "rewards/margins": 0.2754500210285187, "rewards/rejected": -0.508967936038971, "step": 2833 }, { "epoch": 7.759069130732375, "grad_norm": 2.635484457015991, "learning_rate": 6.117808219178083e-07, "log_odds_chosen": 2.5503625869750977, "log_odds_ratio": -0.17149202525615692, "logits/chosen": 0.7999939322471619, "logits/rejected": 0.8083422183990479, "logps/chosen": -1.7338826656341553, "logps/rejected": -4.113013744354248, "loss": 0.7561, "nll_loss": 0.7389147281646729, "rewards/accuracies": 1.0, "rewards/chosen": -0.17338827252388, "rewards/margins": 0.2379131317138672, "rewards/rejected": -0.4113013744354248, "step": 2834 }, { "epoch": 7.761806981519507, "grad_norm": 3.390549659729004, "learning_rate": 6.116438356164383e-07, "log_odds_chosen": 3.254143476486206, "log_odds_ratio": -0.29068487882614136, "logits/chosen": 0.919116735458374, "logits/rejected": 0.9202496409416199, "logps/chosen": -1.7351791858673096, "logps/rejected": -4.848865509033203, "loss": 0.6656, "nll_loss": 0.6365185976028442, "rewards/accuracies": 1.0, "rewards/chosen": -0.17351791262626648, "rewards/margins": 0.3113686442375183, "rewards/rejected": -0.4848865568637848, "step": 2835 }, { "epoch": 7.7645448323066395, "grad_norm": 3.1425764560699463, "learning_rate": 6.115068493150685e-07, "log_odds_chosen": 2.477041482925415, "log_odds_ratio": -0.20940780639648438, "logits/chosen": 1.192681074142456, "logits/rejected": 1.2312254905700684, "logps/chosen": -2.1884098052978516, "logps/rejected": -4.478932857513428, "loss": 0.6155, "nll_loss": 0.5945844054222107, "rewards/accuracies": 0.875, "rewards/chosen": -0.21884098649024963, "rewards/margins": 0.22905229032039642, "rewards/rejected": -0.44789332151412964, "step": 2836 }, { "epoch": 7.767282683093772, "grad_norm": 3.5586788654327393, "learning_rate": 6.113698630136987e-07, "log_odds_chosen": 3.2987656593322754, "log_odds_ratio": -0.09313967078924179, "logits/chosen": 1.0269254446029663, "logits/rejected": 1.0720763206481934, "logps/chosen": -2.158097743988037, "logps/rejected": -5.25752067565918, "loss": 0.6721, "nll_loss": 0.6627534627914429, "rewards/accuracies": 1.0, "rewards/chosen": -0.21580976247787476, "rewards/margins": 0.3099423050880432, "rewards/rejected": -0.525752067565918, "step": 2837 }, { "epoch": 7.770020533880904, "grad_norm": 3.6395814418792725, "learning_rate": 6.112328767123287e-07, "log_odds_chosen": 0.9278074502944946, "log_odds_ratio": -0.4115995466709137, "logits/chosen": 0.8897314071655273, "logits/rejected": 0.910820484161377, "logps/chosen": -2.2739157676696777, "logps/rejected": -3.1080241203308105, "loss": 0.6752, "nll_loss": 0.6339919567108154, "rewards/accuracies": 0.75, "rewards/chosen": -0.2273915708065033, "rewards/margins": 0.0834108218550682, "rewards/rejected": -0.3108024001121521, "step": 2838 }, { "epoch": 7.772758384668036, "grad_norm": 3.173413038253784, "learning_rate": 6.110958904109589e-07, "log_odds_chosen": 2.958303928375244, "log_odds_ratio": -0.20394521951675415, "logits/chosen": 1.1303037405014038, "logits/rejected": 1.1307201385498047, "logps/chosen": -1.7152413129806519, "logps/rejected": -4.507542610168457, "loss": 0.5771, "nll_loss": 0.5567302107810974, "rewards/accuracies": 1.0, "rewards/chosen": -0.17152413725852966, "rewards/margins": 0.27923017740249634, "rewards/rejected": -0.4507542848587036, "step": 2839 }, { "epoch": 7.775496235455168, "grad_norm": 3.64170241355896, "learning_rate": 6.109589041095891e-07, "log_odds_chosen": 2.6483240127563477, "log_odds_ratio": -0.21201163530349731, "logits/chosen": 0.8583524227142334, "logits/rejected": 0.895156979560852, "logps/chosen": -2.5790750980377197, "logps/rejected": -5.070718765258789, "loss": 0.6912, "nll_loss": 0.6700119972229004, "rewards/accuracies": 0.875, "rewards/chosen": -0.257907509803772, "rewards/margins": 0.2491643726825714, "rewards/rejected": -0.507071852684021, "step": 2840 }, { "epoch": 7.7782340862423, "grad_norm": 3.278287410736084, "learning_rate": 6.108219178082191e-07, "log_odds_chosen": 2.129389524459839, "log_odds_ratio": -0.33410483598709106, "logits/chosen": 0.7879326939582825, "logits/rejected": 0.7742682695388794, "logps/chosen": -2.1541194915771484, "logps/rejected": -4.21608829498291, "loss": 0.6702, "nll_loss": 0.636774480342865, "rewards/accuracies": 0.875, "rewards/chosen": -0.2154119610786438, "rewards/margins": 0.2061968296766281, "rewards/rejected": -0.4216088056564331, "step": 2841 }, { "epoch": 7.780971937029432, "grad_norm": 5.100530624389648, "learning_rate": 6.106849315068493e-07, "log_odds_chosen": 2.7451436519622803, "log_odds_ratio": -0.23722991347312927, "logits/chosen": 0.8485403060913086, "logits/rejected": 0.8146287202835083, "logps/chosen": -2.239895820617676, "logps/rejected": -4.854364395141602, "loss": 0.7335, "nll_loss": 0.7097842693328857, "rewards/accuracies": 0.875, "rewards/chosen": -0.2239895910024643, "rewards/margins": 0.2614468038082123, "rewards/rejected": -0.48543643951416016, "step": 2842 }, { "epoch": 7.783709787816564, "grad_norm": 2.981658935546875, "learning_rate": 6.105479452054795e-07, "log_odds_chosen": 2.4254260063171387, "log_odds_ratio": -0.13877525925636292, "logits/chosen": 0.7878988981246948, "logits/rejected": 0.8006530404090881, "logps/chosen": -2.3383071422576904, "logps/rejected": -4.629848957061768, "loss": 0.7208, "nll_loss": 0.7068790793418884, "rewards/accuracies": 1.0, "rewards/chosen": -0.23383072018623352, "rewards/margins": 0.22915419936180115, "rewards/rejected": -0.46298491954803467, "step": 2843 }, { "epoch": 7.786447638603696, "grad_norm": 3.382786273956299, "learning_rate": 6.104109589041096e-07, "log_odds_chosen": 1.6138781309127808, "log_odds_ratio": -0.26613447070121765, "logits/chosen": 0.8386645913124084, "logits/rejected": 0.8342738747596741, "logps/chosen": -1.3883564472198486, "logps/rejected": -2.7865617275238037, "loss": 0.6318, "nll_loss": 0.6052361726760864, "rewards/accuracies": 1.0, "rewards/chosen": -0.13883563876152039, "rewards/margins": 0.13982053101062775, "rewards/rejected": -0.2786561846733093, "step": 2844 }, { "epoch": 7.789185489390828, "grad_norm": 3.158975839614868, "learning_rate": 6.102739726027397e-07, "log_odds_chosen": 2.1609482765197754, "log_odds_ratio": -0.22733621299266815, "logits/chosen": 0.7631126642227173, "logits/rejected": 0.7619091272354126, "logps/chosen": -2.6631550788879395, "logps/rejected": -4.626144886016846, "loss": 0.654, "nll_loss": 0.6312410831451416, "rewards/accuracies": 1.0, "rewards/chosen": -0.2663155198097229, "rewards/margins": 0.1962989866733551, "rewards/rejected": -0.462614506483078, "step": 2845 }, { "epoch": 7.79192334017796, "grad_norm": 3.6293365955352783, "learning_rate": 6.101369863013698e-07, "log_odds_chosen": 1.6617891788482666, "log_odds_ratio": -0.19386669993400574, "logits/chosen": 0.7232269048690796, "logits/rejected": 0.6244247555732727, "logps/chosen": -1.5650038719177246, "logps/rejected": -3.005251884460449, "loss": 0.6687, "nll_loss": 0.6493616104125977, "rewards/accuracies": 1.0, "rewards/chosen": -0.15650039911270142, "rewards/margins": 0.1440248042345047, "rewards/rejected": -0.3005251884460449, "step": 2846 }, { "epoch": 7.794661190965092, "grad_norm": 4.040412425994873, "learning_rate": 6.1e-07, "log_odds_chosen": 0.9809743165969849, "log_odds_ratio": -0.46358177065849304, "logits/chosen": 0.618066668510437, "logits/rejected": 0.5927374362945557, "logps/chosen": -2.12003493309021, "logps/rejected": -3.043041229248047, "loss": 0.6725, "nll_loss": 0.6261193156242371, "rewards/accuracies": 0.75, "rewards/chosen": -0.21200352907180786, "rewards/margins": 0.09230063110589981, "rewards/rejected": -0.3043041527271271, "step": 2847 }, { "epoch": 7.797399041752224, "grad_norm": 6.517998218536377, "learning_rate": 6.098630136986302e-07, "log_odds_chosen": 1.7651097774505615, "log_odds_ratio": -0.7594026327133179, "logits/chosen": 0.8005982637405396, "logits/rejected": 0.9295510649681091, "logps/chosen": -2.5580883026123047, "logps/rejected": -4.298958778381348, "loss": 0.817, "nll_loss": 0.7410140037536621, "rewards/accuracies": 0.75, "rewards/chosen": -0.25580886006355286, "rewards/margins": 0.1740870177745819, "rewards/rejected": -0.42989587783813477, "step": 2848 }, { "epoch": 7.800136892539356, "grad_norm": 3.3763375282287598, "learning_rate": 6.097260273972602e-07, "log_odds_chosen": 1.1654622554779053, "log_odds_ratio": -0.39318040013313293, "logits/chosen": 0.8325339555740356, "logits/rejected": 0.8121169209480286, "logps/chosen": -1.7166728973388672, "logps/rejected": -2.771763801574707, "loss": 0.6198, "nll_loss": 0.5805094242095947, "rewards/accuracies": 1.0, "rewards/chosen": -0.17166729271411896, "rewards/margins": 0.10550907254219055, "rewards/rejected": -0.2771763801574707, "step": 2849 }, { "epoch": 7.8028747433264884, "grad_norm": 3.090005397796631, "learning_rate": 6.095890410958904e-07, "log_odds_chosen": 3.2096428871154785, "log_odds_ratio": -0.16244812309741974, "logits/chosen": 0.972500205039978, "logits/rejected": 0.9542115926742554, "logps/chosen": -2.101712226867676, "logps/rejected": -5.092197895050049, "loss": 0.6976, "nll_loss": 0.6813675165176392, "rewards/accuracies": 1.0, "rewards/chosen": -0.21017125248908997, "rewards/margins": 0.2990485727787018, "rewards/rejected": -0.5092198252677917, "step": 2850 }, { "epoch": 7.8056125941136205, "grad_norm": 3.505840539932251, "learning_rate": 6.094520547945206e-07, "log_odds_chosen": 1.3313164710998535, "log_odds_ratio": -0.29420897364616394, "logits/chosen": 1.120755910873413, "logits/rejected": 1.1223976612091064, "logps/chosen": -1.8610498905181885, "logps/rejected": -3.044739007949829, "loss": 0.6313, "nll_loss": 0.6018397808074951, "rewards/accuracies": 1.0, "rewards/chosen": -0.18610498309135437, "rewards/margins": 0.11836890876293182, "rewards/rejected": -0.3044739067554474, "step": 2851 }, { "epoch": 7.808350444900753, "grad_norm": 2.9031729698181152, "learning_rate": 6.093150684931506e-07, "log_odds_chosen": 2.61114501953125, "log_odds_ratio": -0.1952425241470337, "logits/chosen": 0.7019756436347961, "logits/rejected": 0.6648038029670715, "logps/chosen": -1.6364119052886963, "logps/rejected": -4.018954753875732, "loss": 0.6414, "nll_loss": 0.6218942403793335, "rewards/accuracies": 1.0, "rewards/chosen": -0.16364118456840515, "rewards/margins": 0.23825430870056152, "rewards/rejected": -0.4018954932689667, "step": 2852 }, { "epoch": 7.811088295687885, "grad_norm": 3.054487943649292, "learning_rate": 6.091780821917808e-07, "log_odds_chosen": 3.0545525550842285, "log_odds_ratio": -0.1780003011226654, "logits/chosen": 0.6631912589073181, "logits/rejected": 0.5617998838424683, "logps/chosen": -1.770527958869934, "logps/rejected": -4.639972686767578, "loss": 0.7079, "nll_loss": 0.6900507807731628, "rewards/accuracies": 1.0, "rewards/chosen": -0.1770528107881546, "rewards/margins": 0.2869444489479065, "rewards/rejected": -0.4639972448348999, "step": 2853 }, { "epoch": 7.813826146475018, "grad_norm": 3.0348761081695557, "learning_rate": 6.09041095890411e-07, "log_odds_chosen": 2.9216628074645996, "log_odds_ratio": -0.26536545157432556, "logits/chosen": 0.8319518566131592, "logits/rejected": 0.8352662324905396, "logps/chosen": -2.3238821029663086, "logps/rejected": -5.115423202514648, "loss": 0.6482, "nll_loss": 0.6216743588447571, "rewards/accuracies": 1.0, "rewards/chosen": -0.2323882281780243, "rewards/margins": 0.27915406227111816, "rewards/rejected": -0.5115423202514648, "step": 2854 }, { "epoch": 7.816563997262149, "grad_norm": 3.2031009197235107, "learning_rate": 6.08904109589041e-07, "log_odds_chosen": 3.48374080657959, "log_odds_ratio": -0.1568659543991089, "logits/chosen": 0.8133736252784729, "logits/rejected": 0.8201847672462463, "logps/chosen": -1.6577651500701904, "logps/rejected": -4.945972442626953, "loss": 0.6472, "nll_loss": 0.6315405964851379, "rewards/accuracies": 1.0, "rewards/chosen": -0.16577652096748352, "rewards/margins": 0.32882070541381836, "rewards/rejected": -0.4945971965789795, "step": 2855 }, { "epoch": 7.819301848049282, "grad_norm": 3.8688156604766846, "learning_rate": 6.087671232876712e-07, "log_odds_chosen": 1.5241073369979858, "log_odds_ratio": -0.28109049797058105, "logits/chosen": 0.8746778964996338, "logits/rejected": 0.8859856724739075, "logps/chosen": -2.8716940879821777, "logps/rejected": -4.288897514343262, "loss": 0.753, "nll_loss": 0.7249304056167603, "rewards/accuracies": 1.0, "rewards/chosen": -0.2871694266796112, "rewards/margins": 0.14172038435935974, "rewards/rejected": -0.42888981103897095, "step": 2856 }, { "epoch": 7.822039698836413, "grad_norm": 4.603054046630859, "learning_rate": 6.086301369863014e-07, "log_odds_chosen": 2.700547218322754, "log_odds_ratio": -0.2527693808078766, "logits/chosen": 0.7756322026252747, "logits/rejected": 0.6972391605377197, "logps/chosen": -2.212033748626709, "logps/rejected": -4.78823184967041, "loss": 0.7645, "nll_loss": 0.7392668724060059, "rewards/accuracies": 1.0, "rewards/chosen": -0.22120337188243866, "rewards/margins": 0.25761985778808594, "rewards/rejected": -0.4788232147693634, "step": 2857 }, { "epoch": 7.824777549623546, "grad_norm": 2.87654972076416, "learning_rate": 6.084931506849315e-07, "log_odds_chosen": 2.439666271209717, "log_odds_ratio": -0.1985703706741333, "logits/chosen": 0.7422387599945068, "logits/rejected": 0.700271725654602, "logps/chosen": -2.028258800506592, "logps/rejected": -4.330467224121094, "loss": 0.6802, "nll_loss": 0.6603862047195435, "rewards/accuracies": 1.0, "rewards/chosen": -0.2028258740901947, "rewards/margins": 0.23022088408470154, "rewards/rejected": -0.43304672837257385, "step": 2858 }, { "epoch": 7.827515400410678, "grad_norm": 4.950012683868408, "learning_rate": 6.083561643835616e-07, "log_odds_chosen": 2.5574326515197754, "log_odds_ratio": -0.30743056535720825, "logits/chosen": 0.9414687752723694, "logits/rejected": 0.9875423908233643, "logps/chosen": -2.5740389823913574, "logps/rejected": -5.072268486022949, "loss": 0.8179, "nll_loss": 0.7871589660644531, "rewards/accuracies": 0.75, "rewards/chosen": -0.2574039101600647, "rewards/margins": 0.2498229742050171, "rewards/rejected": -0.5072268843650818, "step": 2859 }, { "epoch": 7.83025325119781, "grad_norm": 3.9251842498779297, "learning_rate": 6.082191780821918e-07, "log_odds_chosen": 2.28619384765625, "log_odds_ratio": -0.25145423412323, "logits/chosen": 1.0294060707092285, "logits/rejected": 1.026450514793396, "logps/chosen": -2.364283561706543, "logps/rejected": -4.520901679992676, "loss": 0.6155, "nll_loss": 0.5903547406196594, "rewards/accuracies": 0.875, "rewards/chosen": -0.23642835021018982, "rewards/margins": 0.21566182374954224, "rewards/rejected": -0.45209017395973206, "step": 2860 }, { "epoch": 7.832991101984942, "grad_norm": 4.901433944702148, "learning_rate": 6.080821917808219e-07, "log_odds_chosen": 1.9248286485671997, "log_odds_ratio": -0.5042473673820496, "logits/chosen": 0.6419287919998169, "logits/rejected": 0.5546295642852783, "logps/chosen": -2.782548189163208, "logps/rejected": -4.577671527862549, "loss": 0.9163, "nll_loss": 0.8658866286277771, "rewards/accuracies": 0.875, "rewards/chosen": -0.27825483679771423, "rewards/margins": 0.17951232194900513, "rewards/rejected": -0.45776715874671936, "step": 2861 }, { "epoch": 7.835728952772074, "grad_norm": 3.4845364093780518, "learning_rate": 6.07945205479452e-07, "log_odds_chosen": 1.3591135740280151, "log_odds_ratio": -0.4696843922138214, "logits/chosen": 0.8352089524269104, "logits/rejected": 0.8022950887680054, "logps/chosen": -1.7910823822021484, "logps/rejected": -2.9236528873443604, "loss": 0.626, "nll_loss": 0.5789906978607178, "rewards/accuracies": 0.75, "rewards/chosen": -0.17910823225975037, "rewards/margins": 0.11325705051422119, "rewards/rejected": -0.29236528277397156, "step": 2862 }, { "epoch": 7.838466803559206, "grad_norm": 2.7491652965545654, "learning_rate": 6.078082191780821e-07, "log_odds_chosen": 2.0568645000457764, "log_odds_ratio": -0.2767447829246521, "logits/chosen": 0.7888755202293396, "logits/rejected": 0.7225293517112732, "logps/chosen": -1.2675248384475708, "logps/rejected": -3.1041159629821777, "loss": 0.6411, "nll_loss": 0.6134518384933472, "rewards/accuracies": 1.0, "rewards/chosen": -0.12675249576568604, "rewards/margins": 0.18365910649299622, "rewards/rejected": -0.31041160225868225, "step": 2863 }, { "epoch": 7.841204654346338, "grad_norm": 5.387051105499268, "learning_rate": 6.076712328767123e-07, "log_odds_chosen": 1.1878204345703125, "log_odds_ratio": -0.3724253177642822, "logits/chosen": 0.9027799963951111, "logits/rejected": 0.83510422706604, "logps/chosen": -2.3316383361816406, "logps/rejected": -3.434971332550049, "loss": 0.7129, "nll_loss": 0.675697922706604, "rewards/accuracies": 0.875, "rewards/chosen": -0.23316383361816406, "rewards/margins": 0.11033329367637634, "rewards/rejected": -0.3434971272945404, "step": 2864 }, { "epoch": 7.84394250513347, "grad_norm": 3.1141862869262695, "learning_rate": 6.075342465753425e-07, "log_odds_chosen": 3.7180936336517334, "log_odds_ratio": -0.07419272512197495, "logits/chosen": 1.163318157196045, "logits/rejected": 1.1818398237228394, "logps/chosen": -2.2408061027526855, "logps/rejected": -5.830400466918945, "loss": 0.626, "nll_loss": 0.6185977458953857, "rewards/accuracies": 1.0, "rewards/chosen": -0.2240806370973587, "rewards/margins": 0.3589593768119812, "rewards/rejected": -0.5830401182174683, "step": 2865 }, { "epoch": 7.846680355920602, "grad_norm": 4.113004684448242, "learning_rate": 6.073972602739725e-07, "log_odds_chosen": 1.4074511528015137, "log_odds_ratio": -0.4465767741203308, "logits/chosen": 0.7873775362968445, "logits/rejected": 0.7962397933006287, "logps/chosen": -2.301229238510132, "logps/rejected": -3.564958333969116, "loss": 0.713, "nll_loss": 0.6683030724525452, "rewards/accuracies": 0.75, "rewards/chosen": -0.23012295365333557, "rewards/margins": 0.12637290358543396, "rewards/rejected": -0.35649585723876953, "step": 2866 }, { "epoch": 7.8494182067077345, "grad_norm": 3.466200351715088, "learning_rate": 6.072602739726027e-07, "log_odds_chosen": 2.4622716903686523, "log_odds_ratio": -0.3489583432674408, "logits/chosen": 0.9578691124916077, "logits/rejected": 0.9119997620582581, "logps/chosen": -1.8437209129333496, "logps/rejected": -4.062657356262207, "loss": 0.6394, "nll_loss": 0.6044678688049316, "rewards/accuracies": 0.875, "rewards/chosen": -0.18437209725379944, "rewards/margins": 0.22189363837242126, "rewards/rejected": -0.4062657356262207, "step": 2867 }, { "epoch": 7.852156057494867, "grad_norm": 3.381561517715454, "learning_rate": 6.071232876712329e-07, "log_odds_chosen": 2.0387797355651855, "log_odds_ratio": -0.18624088168144226, "logits/chosen": 0.8078681826591492, "logits/rejected": 0.80656498670578, "logps/chosen": -2.0221128463745117, "logps/rejected": -3.9273841381073, "loss": 0.7169, "nll_loss": 0.6983172297477722, "rewards/accuracies": 1.0, "rewards/chosen": -0.20221126079559326, "rewards/margins": 0.19052714109420776, "rewards/rejected": -0.392738401889801, "step": 2868 }, { "epoch": 7.854893908281999, "grad_norm": 4.098496913909912, "learning_rate": 6.069863013698629e-07, "log_odds_chosen": 1.2761625051498413, "log_odds_ratio": -0.48740822076797485, "logits/chosen": 0.6376339793205261, "logits/rejected": 0.5238205790519714, "logps/chosen": -1.8428077697753906, "logps/rejected": -2.957543134689331, "loss": 0.6852, "nll_loss": 0.6364740133285522, "rewards/accuracies": 0.875, "rewards/chosen": -0.18428076803684235, "rewards/margins": 0.11147354543209076, "rewards/rejected": -0.2957543134689331, "step": 2869 }, { "epoch": 7.857631759069131, "grad_norm": 3.413245677947998, "learning_rate": 6.068493150684931e-07, "log_odds_chosen": 1.6923727989196777, "log_odds_ratio": -0.3600825369358063, "logits/chosen": 0.8067821860313416, "logits/rejected": 0.767730712890625, "logps/chosen": -2.3373117446899414, "logps/rejected": -3.9416019916534424, "loss": 0.7897, "nll_loss": 0.7537158727645874, "rewards/accuracies": 0.875, "rewards/chosen": -0.23373118042945862, "rewards/margins": 0.16042901575565338, "rewards/rejected": -0.3941602110862732, "step": 2870 }, { "epoch": 7.860369609856263, "grad_norm": 4.248871326446533, "learning_rate": 6.067123287671233e-07, "log_odds_chosen": 2.1041934490203857, "log_odds_ratio": -0.16112461686134338, "logits/chosen": 0.8731681108474731, "logits/rejected": 0.8853315114974976, "logps/chosen": -2.794463872909546, "logps/rejected": -4.774433135986328, "loss": 0.7105, "nll_loss": 0.6943698525428772, "rewards/accuracies": 1.0, "rewards/chosen": -0.27944639325141907, "rewards/margins": 0.19799691438674927, "rewards/rejected": -0.4774433374404907, "step": 2871 }, { "epoch": 7.863107460643395, "grad_norm": 3.0832417011260986, "learning_rate": 6.065753424657534e-07, "log_odds_chosen": 4.248832702636719, "log_odds_ratio": -0.07901415228843689, "logits/chosen": 0.943285346031189, "logits/rejected": 0.9504486322402954, "logps/chosen": -2.2134737968444824, "logps/rejected": -6.313448429107666, "loss": 0.6838, "nll_loss": 0.6758893728256226, "rewards/accuracies": 1.0, "rewards/chosen": -0.221347376704216, "rewards/margins": 0.40999749302864075, "rewards/rejected": -0.6313448548316956, "step": 2872 }, { "epoch": 7.865845311430527, "grad_norm": 3.266958475112915, "learning_rate": 6.064383561643835e-07, "log_odds_chosen": 2.360546588897705, "log_odds_ratio": -0.20169664919376373, "logits/chosen": 0.5373361706733704, "logits/rejected": 0.5367610454559326, "logps/chosen": -2.0555667877197266, "logps/rejected": -4.202300071716309, "loss": 0.6794, "nll_loss": 0.6591871976852417, "rewards/accuracies": 0.875, "rewards/chosen": -0.2055566906929016, "rewards/margins": 0.21467334032058716, "rewards/rejected": -0.42023003101348877, "step": 2873 }, { "epoch": 7.868583162217659, "grad_norm": 2.964874267578125, "learning_rate": 6.063013698630137e-07, "log_odds_chosen": 1.658612608909607, "log_odds_ratio": -0.2686915695667267, "logits/chosen": 0.9497219324111938, "logits/rejected": 0.953681230545044, "logps/chosen": -2.458242177963257, "logps/rejected": -4.026889324188232, "loss": 0.6624, "nll_loss": 0.6355533599853516, "rewards/accuracies": 1.0, "rewards/chosen": -0.24582423269748688, "rewards/margins": 0.1568647027015686, "rewards/rejected": -0.4026889204978943, "step": 2874 }, { "epoch": 7.871321013004791, "grad_norm": 3.205491542816162, "learning_rate": 6.061643835616438e-07, "log_odds_chosen": 3.594761610031128, "log_odds_ratio": -0.1504349708557129, "logits/chosen": 1.0071769952774048, "logits/rejected": 0.996444046497345, "logps/chosen": -1.6079798936843872, "logps/rejected": -4.939761161804199, "loss": 0.6036, "nll_loss": 0.5885812044143677, "rewards/accuracies": 1.0, "rewards/chosen": -0.16079799830913544, "rewards/margins": 0.3331781327724457, "rewards/rejected": -0.4939761459827423, "step": 2875 }, { "epoch": 7.874058863791923, "grad_norm": 5.15969181060791, "learning_rate": 6.060273972602739e-07, "log_odds_chosen": 2.500485897064209, "log_odds_ratio": -0.2671137750148773, "logits/chosen": 0.9311360120773315, "logits/rejected": 0.9479526281356812, "logps/chosen": -2.6364500522613525, "logps/rejected": -5.011280059814453, "loss": 0.7296, "nll_loss": 0.7029334902763367, "rewards/accuracies": 0.875, "rewards/chosen": -0.2636449933052063, "rewards/margins": 0.23748302459716797, "rewards/rejected": -0.501128077507019, "step": 2876 }, { "epoch": 7.876796714579055, "grad_norm": 3.6512646675109863, "learning_rate": 6.05890410958904e-07, "log_odds_chosen": 3.0981202125549316, "log_odds_ratio": -0.26417937874794006, "logits/chosen": 0.8048371076583862, "logits/rejected": 0.8759989738464355, "logps/chosen": -2.3802387714385986, "logps/rejected": -5.363738536834717, "loss": 0.6211, "nll_loss": 0.5946803092956543, "rewards/accuracies": 0.875, "rewards/chosen": -0.23802387714385986, "rewards/margins": 0.29834994673728943, "rewards/rejected": -0.5363738536834717, "step": 2877 }, { "epoch": 7.879534565366187, "grad_norm": 2.956681251525879, "learning_rate": 6.057534246575342e-07, "log_odds_chosen": 3.5018155574798584, "log_odds_ratio": -0.1756127029657364, "logits/chosen": 0.7040936350822449, "logits/rejected": 0.6460503935813904, "logps/chosen": -1.5748695135116577, "logps/rejected": -4.869735240936279, "loss": 0.6649, "nll_loss": 0.6472991704940796, "rewards/accuracies": 1.0, "rewards/chosen": -0.1574869453907013, "rewards/margins": 0.329486608505249, "rewards/rejected": -0.4869735538959503, "step": 2878 }, { "epoch": 7.882272416153319, "grad_norm": 3.4328835010528564, "learning_rate": 6.056164383561644e-07, "log_odds_chosen": 2.654366970062256, "log_odds_ratio": -0.23401488363742828, "logits/chosen": 0.7914688587188721, "logits/rejected": 0.7890704870223999, "logps/chosen": -2.687626361846924, "logps/rejected": -5.240838527679443, "loss": 0.7147, "nll_loss": 0.6913068294525146, "rewards/accuracies": 1.0, "rewards/chosen": -0.26876264810562134, "rewards/margins": 0.255321204662323, "rewards/rejected": -0.5240838527679443, "step": 2879 }, { "epoch": 7.885010266940451, "grad_norm": 6.378763675689697, "learning_rate": 6.054794520547944e-07, "log_odds_chosen": 1.7687618732452393, "log_odds_ratio": -0.4910648465156555, "logits/chosen": 1.1112282276153564, "logits/rejected": 1.0979175567626953, "logps/chosen": -2.1797451972961426, "logps/rejected": -3.8203816413879395, "loss": 0.7319, "nll_loss": 0.6828000545501709, "rewards/accuracies": 0.875, "rewards/chosen": -0.21797451376914978, "rewards/margins": 0.16406366229057312, "rewards/rejected": -0.3820381760597229, "step": 2880 }, { "epoch": 7.887748117727584, "grad_norm": 2.87894606590271, "learning_rate": 6.053424657534246e-07, "log_odds_chosen": 2.143332004547119, "log_odds_ratio": -0.252066433429718, "logits/chosen": 1.011944055557251, "logits/rejected": 1.0491139888763428, "logps/chosen": -1.9215244054794312, "logps/rejected": -3.8565380573272705, "loss": 0.6802, "nll_loss": 0.654973566532135, "rewards/accuracies": 0.875, "rewards/chosen": -0.1921524554491043, "rewards/margins": 0.19350136816501617, "rewards/rejected": -0.3856537938117981, "step": 2881 }, { "epoch": 7.8904859685147155, "grad_norm": 2.618915319442749, "learning_rate": 6.052054794520548e-07, "log_odds_chosen": 2.5224721431732178, "log_odds_ratio": -0.24902895092964172, "logits/chosen": 1.084350824356079, "logits/rejected": 1.0774030685424805, "logps/chosen": -1.7402360439300537, "logps/rejected": -4.105537414550781, "loss": 0.5943, "nll_loss": 0.5694227814674377, "rewards/accuracies": 0.875, "rewards/chosen": -0.1740235984325409, "rewards/margins": 0.2365301549434662, "rewards/rejected": -0.4105537533760071, "step": 2882 }, { "epoch": 7.8932238193018485, "grad_norm": 3.2497851848602295, "learning_rate": 6.050684931506848e-07, "log_odds_chosen": 3.8136844635009766, "log_odds_ratio": -0.06341519951820374, "logits/chosen": 0.8353844881057739, "logits/rejected": 0.8595982789993286, "logps/chosen": -1.6456820964813232, "logps/rejected": -5.078091621398926, "loss": 0.6955, "nll_loss": 0.6891829371452332, "rewards/accuracies": 1.0, "rewards/chosen": -0.1645682156085968, "rewards/margins": 0.3432409167289734, "rewards/rejected": -0.5078091621398926, "step": 2883 }, { "epoch": 7.8959616700889805, "grad_norm": 3.797102212905884, "learning_rate": 6.04931506849315e-07, "log_odds_chosen": 2.2583162784576416, "log_odds_ratio": -0.26890310645103455, "logits/chosen": 0.8655596971511841, "logits/rejected": 0.8910077810287476, "logps/chosen": -1.7427983283996582, "logps/rejected": -3.7823197841644287, "loss": 0.6148, "nll_loss": 0.5879426598548889, "rewards/accuracies": 1.0, "rewards/chosen": -0.1742798388004303, "rewards/margins": 0.2039521336555481, "rewards/rejected": -0.3782319724559784, "step": 2884 }, { "epoch": 7.898699520876113, "grad_norm": 4.990521430969238, "learning_rate": 6.047945205479452e-07, "log_odds_chosen": 2.8941287994384766, "log_odds_ratio": -0.3647102415561676, "logits/chosen": 1.11733877658844, "logits/rejected": 1.1446999311447144, "logps/chosen": -3.084299325942993, "logps/rejected": -5.896239280700684, "loss": 0.6686, "nll_loss": 0.6321070194244385, "rewards/accuracies": 0.875, "rewards/chosen": -0.30842992663383484, "rewards/margins": 0.28119397163391113, "rewards/rejected": -0.5896239280700684, "step": 2885 }, { "epoch": 7.901437371663245, "grad_norm": 3.2202436923980713, "learning_rate": 6.046575342465753e-07, "log_odds_chosen": 2.1980032920837402, "log_odds_ratio": -0.2712899148464203, "logits/chosen": 0.674970805644989, "logits/rejected": 0.6658318638801575, "logps/chosen": -2.0611627101898193, "logps/rejected": -4.118377685546875, "loss": 0.6235, "nll_loss": 0.5963535904884338, "rewards/accuracies": 0.875, "rewards/chosen": -0.20611628890037537, "rewards/margins": 0.20572152733802795, "rewards/rejected": -0.41183778643608093, "step": 2886 }, { "epoch": 7.904175222450377, "grad_norm": 5.2227373123168945, "learning_rate": 6.045205479452054e-07, "log_odds_chosen": 1.24961519241333, "log_odds_ratio": -0.6265733242034912, "logits/chosen": 0.8644239902496338, "logits/rejected": 0.8093688488006592, "logps/chosen": -2.300867795944214, "logps/rejected": -3.4544739723205566, "loss": 0.7365, "nll_loss": 0.6737982034683228, "rewards/accuracies": 0.75, "rewards/chosen": -0.2300868034362793, "rewards/margins": 0.11536061763763428, "rewards/rejected": -0.3454473912715912, "step": 2887 }, { "epoch": 7.906913073237509, "grad_norm": 2.9267983436584473, "learning_rate": 6.043835616438356e-07, "log_odds_chosen": 1.6296733617782593, "log_odds_ratio": -0.3305114507675171, "logits/chosen": 1.0008962154388428, "logits/rejected": 1.0087764263153076, "logps/chosen": -1.9346836805343628, "logps/rejected": -3.469906806945801, "loss": 0.7293, "nll_loss": 0.6962476968765259, "rewards/accuracies": 1.0, "rewards/chosen": -0.1934683620929718, "rewards/margins": 0.1535223126411438, "rewards/rejected": -0.3469906747341156, "step": 2888 }, { "epoch": 7.909650924024641, "grad_norm": 3.690948009490967, "learning_rate": 6.042465753424657e-07, "log_odds_chosen": 1.9708648920059204, "log_odds_ratio": -0.27351799607276917, "logits/chosen": 0.8000775575637817, "logits/rejected": 0.7739309668540955, "logps/chosen": -1.7292710542678833, "logps/rejected": -3.5491714477539062, "loss": 0.7062, "nll_loss": 0.6788726449012756, "rewards/accuracies": 0.75, "rewards/chosen": -0.1729271113872528, "rewards/margins": 0.18199004232883453, "rewards/rejected": -0.35491716861724854, "step": 2889 }, { "epoch": 7.912388774811773, "grad_norm": 3.528130054473877, "learning_rate": 6.041095890410958e-07, "log_odds_chosen": 1.3494274616241455, "log_odds_ratio": -0.5882933139801025, "logits/chosen": 0.8817695379257202, "logits/rejected": 0.8473203778266907, "logps/chosen": -2.040890693664551, "logps/rejected": -3.2911949157714844, "loss": 0.7091, "nll_loss": 0.650309145450592, "rewards/accuracies": 0.75, "rewards/chosen": -0.20408906042575836, "rewards/margins": 0.12503042817115784, "rewards/rejected": -0.3291195034980774, "step": 2890 }, { "epoch": 7.915126625598905, "grad_norm": 3.0495738983154297, "learning_rate": 6.039726027397259e-07, "log_odds_chosen": 3.319976568222046, "log_odds_ratio": -0.21509581804275513, "logits/chosen": 1.0255297422409058, "logits/rejected": 0.9987292885780334, "logps/chosen": -2.2056491374969482, "logps/rejected": -5.397188663482666, "loss": 0.7048, "nll_loss": 0.6833163499832153, "rewards/accuracies": 0.875, "rewards/chosen": -0.22056493163108826, "rewards/margins": 0.31915396451950073, "rewards/rejected": -0.5397188663482666, "step": 2891 }, { "epoch": 7.917864476386037, "grad_norm": 3.181938648223877, "learning_rate": 6.038356164383561e-07, "log_odds_chosen": 2.251018762588501, "log_odds_ratio": -0.29104477167129517, "logits/chosen": 0.5783973932266235, "logits/rejected": 0.5087569952011108, "logps/chosen": -1.9069056510925293, "logps/rejected": -4.032797813415527, "loss": 0.6458, "nll_loss": 0.6166635155677795, "rewards/accuracies": 0.875, "rewards/chosen": -0.1906905472278595, "rewards/margins": 0.21258921921253204, "rewards/rejected": -0.40327978134155273, "step": 2892 }, { "epoch": 7.920602327173169, "grad_norm": 2.8936703205108643, "learning_rate": 6.036986301369863e-07, "log_odds_chosen": 1.4630008935928345, "log_odds_ratio": -0.2388736605644226, "logits/chosen": 0.5944448709487915, "logits/rejected": 0.5705995559692383, "logps/chosen": -1.9061830043792725, "logps/rejected": -3.2439136505126953, "loss": 0.7651, "nll_loss": 0.7411817908287048, "rewards/accuracies": 1.0, "rewards/chosen": -0.19061830639839172, "rewards/margins": 0.1337730586528778, "rewards/rejected": -0.32439136505126953, "step": 2893 }, { "epoch": 7.923340177960301, "grad_norm": 3.8409981727600098, "learning_rate": 6.035616438356163e-07, "log_odds_chosen": 1.7222263813018799, "log_odds_ratio": -0.4055112600326538, "logits/chosen": 0.6943013668060303, "logits/rejected": 0.6440883874893188, "logps/chosen": -2.2528152465820312, "logps/rejected": -3.8197884559631348, "loss": 0.7593, "nll_loss": 0.7187245488166809, "rewards/accuracies": 0.875, "rewards/chosen": -0.22528153657913208, "rewards/margins": 0.15669727325439453, "rewards/rejected": -0.3819788098335266, "step": 2894 }, { "epoch": 7.926078028747433, "grad_norm": 2.944854736328125, "learning_rate": 6.034246575342465e-07, "log_odds_chosen": 2.3679325580596924, "log_odds_ratio": -0.2294488251209259, "logits/chosen": 0.6205573678016663, "logits/rejected": 0.5571010112762451, "logps/chosen": -1.9351476430892944, "logps/rejected": -4.155357360839844, "loss": 0.6222, "nll_loss": 0.5992646217346191, "rewards/accuracies": 1.0, "rewards/chosen": -0.19351476430892944, "rewards/margins": 0.2220209538936615, "rewards/rejected": -0.41553574800491333, "step": 2895 }, { "epoch": 7.928815879534565, "grad_norm": 3.0926902294158936, "learning_rate": 6.032876712328767e-07, "log_odds_chosen": 3.5556278228759766, "log_odds_ratio": -0.16680382192134857, "logits/chosen": 0.9098974466323853, "logits/rejected": 0.949421763420105, "logps/chosen": -1.8288140296936035, "logps/rejected": -5.191877365112305, "loss": 0.7537, "nll_loss": 0.7370355725288391, "rewards/accuracies": 1.0, "rewards/chosen": -0.18288138508796692, "rewards/margins": 0.3363063931465149, "rewards/rejected": -0.5191878080368042, "step": 2896 }, { "epoch": 7.931553730321697, "grad_norm": 4.23004150390625, "learning_rate": 6.031506849315067e-07, "log_odds_chosen": 1.805641770362854, "log_odds_ratio": -0.46302831172943115, "logits/chosen": 0.9812821745872498, "logits/rejected": 1.0008689165115356, "logps/chosen": -2.9720633029937744, "logps/rejected": -4.688131809234619, "loss": 0.7114, "nll_loss": 0.665101170539856, "rewards/accuracies": 0.875, "rewards/chosen": -0.297206312417984, "rewards/margins": 0.1716068834066391, "rewards/rejected": -0.4688131809234619, "step": 2897 }, { "epoch": 7.9342915811088295, "grad_norm": 4.6439595222473145, "learning_rate": 6.030136986301369e-07, "log_odds_chosen": 1.430053472518921, "log_odds_ratio": -0.4791349768638611, "logits/chosen": 0.8540236353874207, "logits/rejected": 0.8469633460044861, "logps/chosen": -1.8725013732910156, "logps/rejected": -3.0912654399871826, "loss": 0.6893, "nll_loss": 0.6413888931274414, "rewards/accuracies": 0.75, "rewards/chosen": -0.18725015223026276, "rewards/margins": 0.12187637388706207, "rewards/rejected": -0.30912652611732483, "step": 2898 }, { "epoch": 7.9370294318959616, "grad_norm": 4.4303154945373535, "learning_rate": 6.028767123287671e-07, "log_odds_chosen": 1.5933234691619873, "log_odds_ratio": -0.46150094270706177, "logits/chosen": 0.8974317312240601, "logits/rejected": 0.8323072195053101, "logps/chosen": -2.578260898590088, "logps/rejected": -4.099762916564941, "loss": 0.8466, "nll_loss": 0.8004474639892578, "rewards/accuracies": 0.875, "rewards/chosen": -0.2578260898590088, "rewards/margins": 0.1521502137184143, "rewards/rejected": -0.4099763035774231, "step": 2899 }, { "epoch": 7.939767282683094, "grad_norm": 3.7301764488220215, "learning_rate": 6.027397260273972e-07, "log_odds_chosen": 3.3013243675231934, "log_odds_ratio": -0.238956481218338, "logits/chosen": 1.1176685094833374, "logits/rejected": 1.085233449935913, "logps/chosen": -2.4329967498779297, "logps/rejected": -5.601640224456787, "loss": 0.6901, "nll_loss": 0.6662493944168091, "rewards/accuracies": 1.0, "rewards/chosen": -0.2432996928691864, "rewards/margins": 0.31686437129974365, "rewards/rejected": -0.5601640343666077, "step": 2900 }, { "epoch": 7.942505133470226, "grad_norm": 2.9553160667419434, "learning_rate": 6.026027397260273e-07, "log_odds_chosen": 2.1580252647399902, "log_odds_ratio": -0.21120832860469818, "logits/chosen": 0.7360737323760986, "logits/rejected": 0.7344694137573242, "logps/chosen": -1.9724085330963135, "logps/rejected": -3.967832088470459, "loss": 0.679, "nll_loss": 0.6578295826911926, "rewards/accuracies": 1.0, "rewards/chosen": -0.19724084436893463, "rewards/margins": 0.19954237341880798, "rewards/rejected": -0.3967832326889038, "step": 2901 }, { "epoch": 7.945242984257358, "grad_norm": 4.088716983795166, "learning_rate": 6.024657534246576e-07, "log_odds_chosen": 2.5067334175109863, "log_odds_ratio": -0.3526081442832947, "logits/chosen": 0.7057244777679443, "logits/rejected": 0.6791978478431702, "logps/chosen": -2.2074387073516846, "logps/rejected": -4.599355697631836, "loss": 0.8081, "nll_loss": 0.772816002368927, "rewards/accuracies": 0.875, "rewards/chosen": -0.22074387967586517, "rewards/margins": 0.23919174075126648, "rewards/rejected": -0.45993560552597046, "step": 2902 }, { "epoch": 7.94798083504449, "grad_norm": 3.0235159397125244, "learning_rate": 6.023287671232877e-07, "log_odds_chosen": 2.5526037216186523, "log_odds_ratio": -0.14454078674316406, "logits/chosen": 0.8047603368759155, "logits/rejected": 0.7328313589096069, "logps/chosen": -1.5585863590240479, "logps/rejected": -3.8777480125427246, "loss": 0.6048, "nll_loss": 0.5903253555297852, "rewards/accuracies": 1.0, "rewards/chosen": -0.15585863590240479, "rewards/margins": 0.2319161593914032, "rewards/rejected": -0.387774795293808, "step": 2903 }, { "epoch": 7.950718685831622, "grad_norm": 7.117213726043701, "learning_rate": 6.021917808219178e-07, "log_odds_chosen": 1.8724981546401978, "log_odds_ratio": -0.3839356005191803, "logits/chosen": 1.1160204410552979, "logits/rejected": 1.1219501495361328, "logps/chosen": -2.5626533031463623, "logps/rejected": -4.3219757080078125, "loss": 0.6992, "nll_loss": 0.6608386635780334, "rewards/accuracies": 0.875, "rewards/chosen": -0.2562653124332428, "rewards/margins": 0.17593222856521606, "rewards/rejected": -0.43219754099845886, "step": 2904 }, { "epoch": 7.953456536618754, "grad_norm": 6.936497688293457, "learning_rate": 6.02054794520548e-07, "log_odds_chosen": 1.925616979598999, "log_odds_ratio": -0.7665530443191528, "logits/chosen": 0.9236340522766113, "logits/rejected": 0.934948742389679, "logps/chosen": -3.012111186981201, "logps/rejected": -4.859877586364746, "loss": 0.784, "nll_loss": 0.7073047161102295, "rewards/accuracies": 0.75, "rewards/chosen": -0.3012111186981201, "rewards/margins": 0.1847766637802124, "rewards/rejected": -0.4859878122806549, "step": 2905 }, { "epoch": 7.956194387405886, "grad_norm": 3.695462226867676, "learning_rate": 6.019178082191781e-07, "log_odds_chosen": 0.7618157863616943, "log_odds_ratio": -0.4401347041130066, "logits/chosen": 1.0715200901031494, "logits/rejected": 1.0419843196868896, "logps/chosen": -1.9545812606811523, "logps/rejected": -2.625645637512207, "loss": 0.6445, "nll_loss": 0.6004739999771118, "rewards/accuracies": 0.875, "rewards/chosen": -0.19545814394950867, "rewards/margins": 0.06710643321275711, "rewards/rejected": -0.2625645697116852, "step": 2906 }, { "epoch": 7.958932238193018, "grad_norm": 6.597378253936768, "learning_rate": 6.017808219178083e-07, "log_odds_chosen": 1.914048671722412, "log_odds_ratio": -0.3388201594352722, "logits/chosen": 0.8847904205322266, "logits/rejected": 0.8747901916503906, "logps/chosen": -2.8704802989959717, "logps/rejected": -4.651535987854004, "loss": 0.7137, "nll_loss": 0.6798405647277832, "rewards/accuracies": 0.875, "rewards/chosen": -0.2870480418205261, "rewards/margins": 0.17810556292533875, "rewards/rejected": -0.46515360474586487, "step": 2907 }, { "epoch": 7.961670088980151, "grad_norm": 3.161435127258301, "learning_rate": 6.016438356164383e-07, "log_odds_chosen": 2.8120763301849365, "log_odds_ratio": -0.1455373466014862, "logits/chosen": 0.931908130645752, "logits/rejected": 0.9488134384155273, "logps/chosen": -2.524801731109619, "logps/rejected": -5.233863830566406, "loss": 0.6344, "nll_loss": 0.6198270916938782, "rewards/accuracies": 1.0, "rewards/chosen": -0.252480149269104, "rewards/margins": 0.2709062397480011, "rewards/rejected": -0.5233863592147827, "step": 2908 }, { "epoch": 7.964407939767282, "grad_norm": 3.212522268295288, "learning_rate": 6.015068493150685e-07, "log_odds_chosen": 1.5569413900375366, "log_odds_ratio": -0.3974376320838928, "logits/chosen": 0.9881618618965149, "logits/rejected": 1.007253885269165, "logps/chosen": -2.2417402267456055, "logps/rejected": -3.7598838806152344, "loss": 0.6775, "nll_loss": 0.6377081871032715, "rewards/accuracies": 0.75, "rewards/chosen": -0.22417402267456055, "rewards/margins": 0.15181437134742737, "rewards/rejected": -0.3759884238243103, "step": 2909 }, { "epoch": 7.967145790554415, "grad_norm": 6.162457466125488, "learning_rate": 6.013698630136987e-07, "log_odds_chosen": 1.735058069229126, "log_odds_ratio": -0.27148014307022095, "logits/chosen": 0.8234509825706482, "logits/rejected": 0.7435179948806763, "logps/chosen": -1.784156322479248, "logps/rejected": -3.2726640701293945, "loss": 0.658, "nll_loss": 0.6308592557907104, "rewards/accuracies": 0.875, "rewards/chosen": -0.17841562628746033, "rewards/margins": 0.14885079860687256, "rewards/rejected": -0.3272664248943329, "step": 2910 }, { "epoch": 7.969883641341547, "grad_norm": 5.1987996101379395, "learning_rate": 6.012328767123287e-07, "log_odds_chosen": 1.7852914333343506, "log_odds_ratio": -0.5722974538803101, "logits/chosen": 1.1300289630889893, "logits/rejected": 1.086915135383606, "logps/chosen": -2.045336961746216, "logps/rejected": -3.650214195251465, "loss": 0.6854, "nll_loss": 0.628204882144928, "rewards/accuracies": 0.875, "rewards/chosen": -0.20453369617462158, "rewards/margins": 0.16048772633075714, "rewards/rejected": -0.3650214374065399, "step": 2911 }, { "epoch": 7.972621492128679, "grad_norm": 3.166280746459961, "learning_rate": 6.010958904109589e-07, "log_odds_chosen": 2.0950450897216797, "log_odds_ratio": -0.26136061549186707, "logits/chosen": 0.5840528011322021, "logits/rejected": 0.5455935597419739, "logps/chosen": -1.4582674503326416, "logps/rejected": -3.331073522567749, "loss": 0.6626, "nll_loss": 0.6364372968673706, "rewards/accuracies": 1.0, "rewards/chosen": -0.14582675695419312, "rewards/margins": 0.1872805953025818, "rewards/rejected": -0.3331073820590973, "step": 2912 }, { "epoch": 7.975359342915811, "grad_norm": 3.428004741668701, "learning_rate": 6.009589041095891e-07, "log_odds_chosen": 1.9252970218658447, "log_odds_ratio": -0.30241772532463074, "logits/chosen": 0.7379302978515625, "logits/rejected": 0.676440954208374, "logps/chosen": -1.2437632083892822, "logps/rejected": -2.928647994995117, "loss": 0.6353, "nll_loss": 0.6050708889961243, "rewards/accuracies": 1.0, "rewards/chosen": -0.1243763267993927, "rewards/margins": 0.16848847270011902, "rewards/rejected": -0.2928647994995117, "step": 2913 }, { "epoch": 7.9780971937029435, "grad_norm": 3.4033336639404297, "learning_rate": 6.008219178082192e-07, "log_odds_chosen": 3.033892869949341, "log_odds_ratio": -0.21927867829799652, "logits/chosen": 0.8149152994155884, "logits/rejected": 0.819538414478302, "logps/chosen": -1.9553955793380737, "logps/rejected": -4.816753387451172, "loss": 0.689, "nll_loss": 0.6670407652854919, "rewards/accuracies": 0.875, "rewards/chosen": -0.19553956389427185, "rewards/margins": 0.28613579273223877, "rewards/rejected": -0.48167532682418823, "step": 2914 }, { "epoch": 7.9808350444900755, "grad_norm": 2.9773929119110107, "learning_rate": 6.006849315068493e-07, "log_odds_chosen": 3.1103270053863525, "log_odds_ratio": -0.1755635142326355, "logits/chosen": 0.8441024422645569, "logits/rejected": 0.7885292768478394, "logps/chosen": -2.0850281715393066, "logps/rejected": -5.064423084259033, "loss": 0.6838, "nll_loss": 0.6662187576293945, "rewards/accuracies": 1.0, "rewards/chosen": -0.20850281417369843, "rewards/margins": 0.2979395091533661, "rewards/rejected": -0.5064423084259033, "step": 2915 }, { "epoch": 7.983572895277208, "grad_norm": 3.283658742904663, "learning_rate": 6.005479452054795e-07, "log_odds_chosen": 1.4378188848495483, "log_odds_ratio": -0.35018181800842285, "logits/chosen": 0.6536753177642822, "logits/rejected": 0.7098881006240845, "logps/chosen": -1.925234079360962, "logps/rejected": -3.254427909851074, "loss": 0.7287, "nll_loss": 0.6936940550804138, "rewards/accuracies": 1.0, "rewards/chosen": -0.19252340495586395, "rewards/margins": 0.13291941583156586, "rewards/rejected": -0.3254428207874298, "step": 2916 }, { "epoch": 7.98631074606434, "grad_norm": 4.023990154266357, "learning_rate": 6.004109589041096e-07, "log_odds_chosen": 1.830739974975586, "log_odds_ratio": -0.24849382042884827, "logits/chosen": 0.9806137681007385, "logits/rejected": 0.9283751845359802, "logps/chosen": -1.8543686866760254, "logps/rejected": -3.431295156478882, "loss": 0.6256, "nll_loss": 0.6007997989654541, "rewards/accuracies": 1.0, "rewards/chosen": -0.18543687462806702, "rewards/margins": 0.15769264101982117, "rewards/rejected": -0.3431295156478882, "step": 2917 }, { "epoch": 7.989048596851472, "grad_norm": 3.4009134769439697, "learning_rate": 6.002739726027397e-07, "log_odds_chosen": 3.1619298458099365, "log_odds_ratio": -0.14067570865154266, "logits/chosen": 0.9830838441848755, "logits/rejected": 0.9409238696098328, "logps/chosen": -1.6946518421173096, "logps/rejected": -4.64818000793457, "loss": 0.6482, "nll_loss": 0.6341253519058228, "rewards/accuracies": 1.0, "rewards/chosen": -0.16946518421173096, "rewards/margins": 0.29535287618637085, "rewards/rejected": -0.46481800079345703, "step": 2918 }, { "epoch": 7.991786447638604, "grad_norm": 7.436766624450684, "learning_rate": 6.001369863013699e-07, "log_odds_chosen": 2.5745837688446045, "log_odds_ratio": -0.7067461609840393, "logits/chosen": 0.8058642148971558, "logits/rejected": 0.7794808745384216, "logps/chosen": -2.5026702880859375, "logps/rejected": -4.8719563484191895, "loss": 0.7136, "nll_loss": 0.6429292559623718, "rewards/accuracies": 0.875, "rewards/chosen": -0.25026702880859375, "rewards/margins": 0.23692864179611206, "rewards/rejected": -0.4871956408023834, "step": 2919 }, { "epoch": 7.994524298425736, "grad_norm": 3.4397451877593994, "learning_rate": 6e-07, "log_odds_chosen": 1.5434741973876953, "log_odds_ratio": -0.376184344291687, "logits/chosen": 0.8528531789779663, "logits/rejected": 0.8595523238182068, "logps/chosen": -2.1436867713928223, "logps/rejected": -3.6340219974517822, "loss": 0.6774, "nll_loss": 0.6398143768310547, "rewards/accuracies": 0.75, "rewards/chosen": -0.21436870098114014, "rewards/margins": 0.14903351664543152, "rewards/rejected": -0.36340221762657166, "step": 2920 }, { "epoch": 7.997262149212868, "grad_norm": 4.89866304397583, "learning_rate": 5.998630136986302e-07, "log_odds_chosen": 1.66020667552948, "log_odds_ratio": -0.34699365496635437, "logits/chosen": 0.7152718305587769, "logits/rejected": 0.6253539323806763, "logps/chosen": -1.867152214050293, "logps/rejected": -3.387657642364502, "loss": 0.6511, "nll_loss": 0.6164160966873169, "rewards/accuracies": 0.875, "rewards/chosen": -0.186715230345726, "rewards/margins": 0.15205053985118866, "rewards/rejected": -0.3387657701969147, "step": 2921 }, { "epoch": 8.0, "grad_norm": 3.404343843460083, "learning_rate": 5.997260273972602e-07, "log_odds_chosen": 1.7401779890060425, "log_odds_ratio": -0.2845899164676666, "logits/chosen": 0.8373138308525085, "logits/rejected": 0.8040345907211304, "logps/chosen": -1.590277910232544, "logps/rejected": -3.1559815406799316, "loss": 0.5905, "nll_loss": 0.5620824098587036, "rewards/accuracies": 0.875, "rewards/chosen": -0.1590277999639511, "rewards/margins": 0.15657036006450653, "rewards/rejected": -0.31559813022613525, "step": 2922 }, { "epoch": 8.002737850787133, "grad_norm": 6.110729217529297, "learning_rate": 5.995890410958904e-07, "log_odds_chosen": 1.423417329788208, "log_odds_ratio": -0.47896769642829895, "logits/chosen": 0.9461418390274048, "logits/rejected": 0.9467142224311829, "logps/chosen": -2.06546950340271, "logps/rejected": -3.333559036254883, "loss": 0.6433, "nll_loss": 0.5953558683395386, "rewards/accuracies": 0.875, "rewards/chosen": -0.20654696226119995, "rewards/margins": 0.12680897116661072, "rewards/rejected": -0.3333559036254883, "step": 2923 }, { "epoch": 8.005475701574264, "grad_norm": 3.319244384765625, "learning_rate": 5.994520547945206e-07, "log_odds_chosen": 1.8759371042251587, "log_odds_ratio": -0.20159554481506348, "logits/chosen": 0.8757908940315247, "logits/rejected": 0.8745207786560059, "logps/chosen": -1.4298089742660522, "logps/rejected": -3.0886447429656982, "loss": 0.6102, "nll_loss": 0.5900630950927734, "rewards/accuracies": 1.0, "rewards/chosen": -0.1429809033870697, "rewards/margins": 0.16588357090950012, "rewards/rejected": -0.3088644742965698, "step": 2924 }, { "epoch": 8.008213552361397, "grad_norm": 4.864176273345947, "learning_rate": 5.993150684931506e-07, "log_odds_chosen": 1.2021489143371582, "log_odds_ratio": -0.8472844362258911, "logits/chosen": 1.0770514011383057, "logits/rejected": 1.0944175720214844, "logps/chosen": -2.4644434452056885, "logps/rejected": -3.4938836097717285, "loss": 0.6773, "nll_loss": 0.5925719141960144, "rewards/accuracies": 0.75, "rewards/chosen": -0.24644435942173004, "rewards/margins": 0.1029440313577652, "rewards/rejected": -0.34938839077949524, "step": 2925 }, { "epoch": 8.010951403148528, "grad_norm": 3.719391107559204, "learning_rate": 5.991780821917808e-07, "log_odds_chosen": 1.9264991283416748, "log_odds_ratio": -0.28812307119369507, "logits/chosen": 1.0741209983825684, "logits/rejected": 1.0981426239013672, "logps/chosen": -2.5799479484558105, "logps/rejected": -4.4112749099731445, "loss": 0.7858, "nll_loss": 0.7569509148597717, "rewards/accuracies": 0.875, "rewards/chosen": -0.25799477100372314, "rewards/margins": 0.18313269317150116, "rewards/rejected": -0.4411274790763855, "step": 2926 }, { "epoch": 8.013689253935661, "grad_norm": 3.5588266849517822, "learning_rate": 5.99041095890411e-07, "log_odds_chosen": 2.023587226867676, "log_odds_ratio": -0.3957006335258484, "logits/chosen": 0.776131272315979, "logits/rejected": 0.749585747718811, "logps/chosen": -1.461322546005249, "logps/rejected": -3.305387496948242, "loss": 0.6007, "nll_loss": 0.5611710548400879, "rewards/accuracies": 0.875, "rewards/chosen": -0.14613224565982819, "rewards/margins": 0.18440653383731842, "rewards/rejected": -0.3305387794971466, "step": 2927 }, { "epoch": 8.016427104722792, "grad_norm": 3.535428524017334, "learning_rate": 5.98904109589041e-07, "log_odds_chosen": 2.5252456665039062, "log_odds_ratio": -0.23340380191802979, "logits/chosen": 0.8073617815971375, "logits/rejected": 0.7704499959945679, "logps/chosen": -1.9331434965133667, "logps/rejected": -4.325222492218018, "loss": 0.6274, "nll_loss": 0.6040446758270264, "rewards/accuracies": 1.0, "rewards/chosen": -0.1933143436908722, "rewards/margins": 0.2392079085111618, "rewards/rejected": -0.4325222373008728, "step": 2928 }, { "epoch": 8.019164955509925, "grad_norm": 3.6642866134643555, "learning_rate": 5.987671232876712e-07, "log_odds_chosen": 1.3954060077667236, "log_odds_ratio": -0.49118077754974365, "logits/chosen": 0.768835186958313, "logits/rejected": 0.7949849367141724, "logps/chosen": -2.3604671955108643, "logps/rejected": -3.7069156169891357, "loss": 0.7096, "nll_loss": 0.6604489684104919, "rewards/accuracies": 0.625, "rewards/chosen": -0.2360467165708542, "rewards/margins": 0.13464485108852386, "rewards/rejected": -0.37069156765937805, "step": 2929 }, { "epoch": 8.021902806297057, "grad_norm": 3.282439947128296, "learning_rate": 5.986301369863014e-07, "log_odds_chosen": 2.772308826446533, "log_odds_ratio": -0.4153847098350525, "logits/chosen": 0.7090173959732056, "logits/rejected": 0.6887654662132263, "logps/chosen": -2.3015103340148926, "logps/rejected": -4.919412612915039, "loss": 0.7418, "nll_loss": 0.700234055519104, "rewards/accuracies": 0.875, "rewards/chosen": -0.23015102744102478, "rewards/margins": 0.2617902457714081, "rewards/rejected": -0.49194127321243286, "step": 2930 }, { "epoch": 8.02464065708419, "grad_norm": 3.5185816287994385, "learning_rate": 5.984931506849315e-07, "log_odds_chosen": 3.556661367416382, "log_odds_ratio": -0.32332655787467957, "logits/chosen": 0.6349642276763916, "logits/rejected": 0.6161673069000244, "logps/chosen": -2.044722080230713, "logps/rejected": -5.427048206329346, "loss": 0.6588, "nll_loss": 0.6264678239822388, "rewards/accuracies": 0.75, "rewards/chosen": -0.20447222888469696, "rewards/margins": 0.3382326066493988, "rewards/rejected": -0.5427048206329346, "step": 2931 }, { "epoch": 8.02737850787132, "grad_norm": 3.371077299118042, "learning_rate": 5.983561643835616e-07, "log_odds_chosen": 2.484070301055908, "log_odds_ratio": -0.13179321587085724, "logits/chosen": 1.0314257144927979, "logits/rejected": 0.9971963167190552, "logps/chosen": -1.2447190284729004, "logps/rejected": -3.3501040935516357, "loss": 0.5279, "nll_loss": 0.5147227644920349, "rewards/accuracies": 1.0, "rewards/chosen": -0.12447191029787064, "rewards/margins": 0.21053846180438995, "rewards/rejected": -0.3350103795528412, "step": 2932 }, { "epoch": 8.030116358658454, "grad_norm": 3.983896493911743, "learning_rate": 5.982191780821918e-07, "log_odds_chosen": 2.5394835472106934, "log_odds_ratio": -0.25469499826431274, "logits/chosen": 0.8390763401985168, "logits/rejected": 0.8556747436523438, "logps/chosen": -2.6013851165771484, "logps/rejected": -5.032317161560059, "loss": 0.7062, "nll_loss": 0.6807548999786377, "rewards/accuracies": 0.875, "rewards/chosen": -0.26013851165771484, "rewards/margins": 0.24309316277503967, "rewards/rejected": -0.5032317042350769, "step": 2933 }, { "epoch": 8.032854209445585, "grad_norm": 3.785349130630493, "learning_rate": 5.980821917808219e-07, "log_odds_chosen": 2.0264806747436523, "log_odds_ratio": -0.44788309931755066, "logits/chosen": 0.9466123580932617, "logits/rejected": 0.9789915680885315, "logps/chosen": -2.187366247177124, "logps/rejected": -4.117444038391113, "loss": 0.6846, "nll_loss": 0.6398265361785889, "rewards/accuracies": 0.875, "rewards/chosen": -0.21873661875724792, "rewards/margins": 0.19300776720046997, "rewards/rejected": -0.4117443561553955, "step": 2934 }, { "epoch": 8.035592060232718, "grad_norm": 3.2748405933380127, "learning_rate": 5.97945205479452e-07, "log_odds_chosen": 2.39168643951416, "log_odds_ratio": -0.24376389384269714, "logits/chosen": 0.9718941450119019, "logits/rejected": 0.9270440936088562, "logps/chosen": -1.466590404510498, "logps/rejected": -3.6750082969665527, "loss": 0.5876, "nll_loss": 0.5631816387176514, "rewards/accuracies": 1.0, "rewards/chosen": -0.14665904641151428, "rewards/margins": 0.22084179520606995, "rewards/rejected": -0.36750084161758423, "step": 2935 }, { "epoch": 8.038329911019849, "grad_norm": 3.3543310165405273, "learning_rate": 5.978082191780822e-07, "log_odds_chosen": 2.7254462242126465, "log_odds_ratio": -0.27009862661361694, "logits/chosen": 0.8499492406845093, "logits/rejected": 0.8369821906089783, "logps/chosen": -1.4906809329986572, "logps/rejected": -4.062740325927734, "loss": 0.6098, "nll_loss": 0.5828264951705933, "rewards/accuracies": 1.0, "rewards/chosen": -0.14906808733940125, "rewards/margins": 0.25720590353012085, "rewards/rejected": -0.4062740206718445, "step": 2936 }, { "epoch": 8.041067761806982, "grad_norm": 3.2383036613464355, "learning_rate": 5.976712328767123e-07, "log_odds_chosen": 2.0959062576293945, "log_odds_ratio": -0.1976798176765442, "logits/chosen": 0.8644511103630066, "logits/rejected": 0.8224579691886902, "logps/chosen": -1.7103136777877808, "logps/rejected": -3.5855751037597656, "loss": 0.5754, "nll_loss": 0.5556647181510925, "rewards/accuracies": 1.0, "rewards/chosen": -0.17103135585784912, "rewards/margins": 0.18752619624137878, "rewards/rejected": -0.3585575819015503, "step": 2937 }, { "epoch": 8.043805612594113, "grad_norm": 4.738189697265625, "learning_rate": 5.975342465753425e-07, "log_odds_chosen": 2.5562663078308105, "log_odds_ratio": -0.20415642857551575, "logits/chosen": 0.9132330417633057, "logits/rejected": 0.9517383575439453, "logps/chosen": -2.224510431289673, "logps/rejected": -4.652041435241699, "loss": 0.701, "nll_loss": 0.6805366277694702, "rewards/accuracies": 1.0, "rewards/chosen": -0.22245103120803833, "rewards/margins": 0.24275313317775726, "rewards/rejected": -0.4652041494846344, "step": 2938 }, { "epoch": 8.046543463381246, "grad_norm": 3.6795623302459717, "learning_rate": 5.973972602739725e-07, "log_odds_chosen": 2.070390224456787, "log_odds_ratio": -0.2836148738861084, "logits/chosen": 0.731033205986023, "logits/rejected": 0.7058913111686707, "logps/chosen": -2.193598508834839, "logps/rejected": -4.059380531311035, "loss": 0.6272, "nll_loss": 0.5988466143608093, "rewards/accuracies": 0.875, "rewards/chosen": -0.2193598747253418, "rewards/margins": 0.1865781843662262, "rewards/rejected": -0.405938059091568, "step": 2939 }, { "epoch": 8.049281314168377, "grad_norm": 4.31735372543335, "learning_rate": 5.972602739726027e-07, "log_odds_chosen": 3.6386876106262207, "log_odds_ratio": -0.36677250266075134, "logits/chosen": 1.1178927421569824, "logits/rejected": 1.1628592014312744, "logps/chosen": -1.8063735961914062, "logps/rejected": -5.287647247314453, "loss": 0.5803, "nll_loss": 0.5436062216758728, "rewards/accuracies": 0.75, "rewards/chosen": -0.18063735961914062, "rewards/margins": 0.3481273651123047, "rewards/rejected": -0.5287647247314453, "step": 2940 }, { "epoch": 8.05201916495551, "grad_norm": 3.1282668113708496, "learning_rate": 5.971232876712329e-07, "log_odds_chosen": 2.4000959396362305, "log_odds_ratio": -0.2732149362564087, "logits/chosen": 0.7687805891036987, "logits/rejected": 0.8044393062591553, "logps/chosen": -2.4949254989624023, "logps/rejected": -4.810284614562988, "loss": 0.6101, "nll_loss": 0.5827916860580444, "rewards/accuracies": 0.875, "rewards/chosen": -0.2494925558567047, "rewards/margins": 0.2315359115600586, "rewards/rejected": -0.4810284376144409, "step": 2941 }, { "epoch": 8.054757015742641, "grad_norm": 3.52109432220459, "learning_rate": 5.969863013698629e-07, "log_odds_chosen": 1.8550689220428467, "log_odds_ratio": -0.4281748831272125, "logits/chosen": 0.602346658706665, "logits/rejected": 0.5458152890205383, "logps/chosen": -1.8051632642745972, "logps/rejected": -3.5675179958343506, "loss": 0.6524, "nll_loss": 0.6095572113990784, "rewards/accuracies": 0.75, "rewards/chosen": -0.1805163323879242, "rewards/margins": 0.17623546719551086, "rewards/rejected": -0.35675179958343506, "step": 2942 }, { "epoch": 8.057494866529774, "grad_norm": 3.8231253623962402, "learning_rate": 5.968493150684931e-07, "log_odds_chosen": 2.9428458213806152, "log_odds_ratio": -0.260528564453125, "logits/chosen": 0.637096107006073, "logits/rejected": 0.5697596073150635, "logps/chosen": -2.593142032623291, "logps/rejected": -5.451349258422852, "loss": 0.7637, "nll_loss": 0.7376466989517212, "rewards/accuracies": 1.0, "rewards/chosen": -0.2593142092227936, "rewards/margins": 0.28582069277763367, "rewards/rejected": -0.5451348423957825, "step": 2943 }, { "epoch": 8.060232717316905, "grad_norm": 3.223785877227783, "learning_rate": 5.967123287671233e-07, "log_odds_chosen": 3.3699209690093994, "log_odds_ratio": -0.22371220588684082, "logits/chosen": 0.8878535032272339, "logits/rejected": 0.907969057559967, "logps/chosen": -1.5592372417449951, "logps/rejected": -4.693349361419678, "loss": 0.7059, "nll_loss": 0.6835480332374573, "rewards/accuracies": 1.0, "rewards/chosen": -0.1559237241744995, "rewards/margins": 0.3134112060070038, "rewards/rejected": -0.4693349599838257, "step": 2944 }, { "epoch": 8.062970568104038, "grad_norm": 3.2716517448425293, "learning_rate": 5.965753424657534e-07, "log_odds_chosen": 1.9740848541259766, "log_odds_ratio": -0.2975388467311859, "logits/chosen": 0.8349729776382446, "logits/rejected": 0.8300737142562866, "logps/chosen": -1.7142784595489502, "logps/rejected": -3.533599853515625, "loss": 0.6449, "nll_loss": 0.6151041984558105, "rewards/accuracies": 0.875, "rewards/chosen": -0.17142784595489502, "rewards/margins": 0.18193212151527405, "rewards/rejected": -0.35335999727249146, "step": 2945 }, { "epoch": 8.06570841889117, "grad_norm": 3.1088390350341797, "learning_rate": 5.964383561643835e-07, "log_odds_chosen": 3.452023983001709, "log_odds_ratio": -0.2516862154006958, "logits/chosen": 0.7773873805999756, "logits/rejected": 0.7618877291679382, "logps/chosen": -2.024446964263916, "logps/rejected": -5.306058883666992, "loss": 0.6516, "nll_loss": 0.6264246106147766, "rewards/accuracies": 1.0, "rewards/chosen": -0.20244470238685608, "rewards/margins": 0.32816118001937866, "rewards/rejected": -0.5306059122085571, "step": 2946 }, { "epoch": 8.068446269678303, "grad_norm": 4.0876054763793945, "learning_rate": 5.963013698630137e-07, "log_odds_chosen": 1.9840049743652344, "log_odds_ratio": -0.2836324870586395, "logits/chosen": 0.8598251342773438, "logits/rejected": 0.8723181486129761, "logps/chosen": -1.7232547998428345, "logps/rejected": -3.487015724182129, "loss": 0.6279, "nll_loss": 0.5995829701423645, "rewards/accuracies": 0.875, "rewards/chosen": -0.1723254919052124, "rewards/margins": 0.1763761043548584, "rewards/rejected": -0.3487015962600708, "step": 2947 }, { "epoch": 8.071184120465434, "grad_norm": 3.536522150039673, "learning_rate": 5.961643835616438e-07, "log_odds_chosen": 1.7443480491638184, "log_odds_ratio": -0.33688193559646606, "logits/chosen": 0.7233543992042542, "logits/rejected": 0.6903223395347595, "logps/chosen": -2.221102476119995, "logps/rejected": -3.8731613159179688, "loss": 0.7742, "nll_loss": 0.7404767274856567, "rewards/accuracies": 0.875, "rewards/chosen": -0.22211024165153503, "rewards/margins": 0.1652059257030487, "rewards/rejected": -0.38731613755226135, "step": 2948 }, { "epoch": 8.073921971252567, "grad_norm": 3.605126142501831, "learning_rate": 5.960273972602739e-07, "log_odds_chosen": 2.099867582321167, "log_odds_ratio": -0.3367063105106354, "logits/chosen": 0.7703478932380676, "logits/rejected": 0.8190174698829651, "logps/chosen": -2.1771206855773926, "logps/rejected": -4.134706497192383, "loss": 0.6888, "nll_loss": 0.6551133394241333, "rewards/accuracies": 0.875, "rewards/chosen": -0.21771207451820374, "rewards/margins": 0.1957586109638214, "rewards/rejected": -0.41347068548202515, "step": 2949 }, { "epoch": 8.0766598220397, "grad_norm": 3.175048828125, "learning_rate": 5.958904109589041e-07, "log_odds_chosen": 2.7090702056884766, "log_odds_ratio": -0.14151431620121002, "logits/chosen": 0.8763142824172974, "logits/rejected": 0.8391931056976318, "logps/chosen": -1.6746206283569336, "logps/rejected": -4.124226093292236, "loss": 0.6278, "nll_loss": 0.6136609315872192, "rewards/accuracies": 1.0, "rewards/chosen": -0.1674620807170868, "rewards/margins": 0.24496054649353027, "rewards/rejected": -0.41242265701293945, "step": 2950 }, { "epoch": 8.07939767282683, "grad_norm": 4.36289644241333, "learning_rate": 5.957534246575342e-07, "log_odds_chosen": 3.289578437805176, "log_odds_ratio": -0.4427993595600128, "logits/chosen": 0.7473390698432922, "logits/rejected": 0.7557321786880493, "logps/chosen": -2.412662982940674, "logps/rejected": -5.575606346130371, "loss": 0.667, "nll_loss": 0.6227161884307861, "rewards/accuracies": 0.875, "rewards/chosen": -0.24126631021499634, "rewards/margins": 0.3162943720817566, "rewards/rejected": -0.5575606822967529, "step": 2951 }, { "epoch": 8.082135523613964, "grad_norm": 3.6114158630371094, "learning_rate": 5.956164383561644e-07, "log_odds_chosen": 2.7631356716156006, "log_odds_ratio": -0.2414540946483612, "logits/chosen": 0.9537205100059509, "logits/rejected": 1.017975091934204, "logps/chosen": -2.9503259658813477, "logps/rejected": -5.614503860473633, "loss": 0.7461, "nll_loss": 0.7219663262367249, "rewards/accuracies": 1.0, "rewards/chosen": -0.2950326204299927, "rewards/margins": 0.26641780138015747, "rewards/rejected": -0.5614504218101501, "step": 2952 }, { "epoch": 8.084873374401095, "grad_norm": 5.750400066375732, "learning_rate": 5.954794520547944e-07, "log_odds_chosen": 2.6321821212768555, "log_odds_ratio": -0.28746598958969116, "logits/chosen": 0.822138786315918, "logits/rejected": 0.8137363195419312, "logps/chosen": -2.2551841735839844, "logps/rejected": -4.72437858581543, "loss": 0.8115, "nll_loss": 0.7827702760696411, "rewards/accuracies": 0.875, "rewards/chosen": -0.22551845014095306, "rewards/margins": 0.2469194233417511, "rewards/rejected": -0.47243788838386536, "step": 2953 }, { "epoch": 8.087611225188228, "grad_norm": 9.599128723144531, "learning_rate": 5.953424657534246e-07, "log_odds_chosen": 2.57484769821167, "log_odds_ratio": -0.34712427854537964, "logits/chosen": 0.8427263498306274, "logits/rejected": 0.8610514402389526, "logps/chosen": -3.2792062759399414, "logps/rejected": -5.673457145690918, "loss": 0.742, "nll_loss": 0.7073166966438293, "rewards/accuracies": 0.75, "rewards/chosen": -0.32792067527770996, "rewards/margins": 0.23942504823207855, "rewards/rejected": -0.5673457384109497, "step": 2954 }, { "epoch": 8.09034907597536, "grad_norm": 4.385334491729736, "learning_rate": 5.952054794520548e-07, "log_odds_chosen": 1.1367162466049194, "log_odds_ratio": -0.3733460009098053, "logits/chosen": 0.7237949967384338, "logits/rejected": 0.6921364068984985, "logps/chosen": -2.397186756134033, "logps/rejected": -3.4008994102478027, "loss": 0.6931, "nll_loss": 0.655734658241272, "rewards/accuracies": 0.75, "rewards/chosen": -0.23971867561340332, "rewards/margins": 0.10037128627300262, "rewards/rejected": -0.34008994698524475, "step": 2955 }, { "epoch": 8.093086926762492, "grad_norm": 3.7364559173583984, "learning_rate": 5.950684931506848e-07, "log_odds_chosen": 2.124861717224121, "log_odds_ratio": -0.39229080080986023, "logits/chosen": 0.6361621618270874, "logits/rejected": 0.6309072971343994, "logps/chosen": -2.766113758087158, "logps/rejected": -4.8404388427734375, "loss": 0.7108, "nll_loss": 0.6715919375419617, "rewards/accuracies": 0.75, "rewards/chosen": -0.2766113579273224, "rewards/margins": 0.20743250846862793, "rewards/rejected": -0.4840438961982727, "step": 2956 }, { "epoch": 8.095824777549623, "grad_norm": 3.859149694442749, "learning_rate": 5.94931506849315e-07, "log_odds_chosen": 0.4826014041900635, "log_odds_ratio": -0.5502682328224182, "logits/chosen": 0.6946164965629578, "logits/rejected": 0.6460762023925781, "logps/chosen": -1.6750469207763672, "logps/rejected": -2.1194851398468018, "loss": 0.6667, "nll_loss": 0.6116634011268616, "rewards/accuracies": 0.75, "rewards/chosen": -0.1675046980381012, "rewards/margins": 0.04444381594657898, "rewards/rejected": -0.21194851398468018, "step": 2957 }, { "epoch": 8.098562628336756, "grad_norm": 3.7757656574249268, "learning_rate": 5.947945205479452e-07, "log_odds_chosen": 1.096858024597168, "log_odds_ratio": -0.36996138095855713, "logits/chosen": 0.7857972383499146, "logits/rejected": 0.7318277359008789, "logps/chosen": -1.3570150136947632, "logps/rejected": -2.29720401763916, "loss": 0.6231, "nll_loss": 0.5861080884933472, "rewards/accuracies": 1.0, "rewards/chosen": -0.1357014924287796, "rewards/margins": 0.09401892125606537, "rewards/rejected": -0.22972041368484497, "step": 2958 }, { "epoch": 8.101300479123887, "grad_norm": 3.9291954040527344, "learning_rate": 5.946575342465753e-07, "log_odds_chosen": 1.5157747268676758, "log_odds_ratio": -0.43058860301971436, "logits/chosen": 1.0712852478027344, "logits/rejected": 1.0254747867584229, "logps/chosen": -2.1201467514038086, "logps/rejected": -3.538485527038574, "loss": 0.6864, "nll_loss": 0.6432920098304749, "rewards/accuracies": 0.875, "rewards/chosen": -0.21201467514038086, "rewards/margins": 0.14183387160301208, "rewards/rejected": -0.35384851694107056, "step": 2959 }, { "epoch": 8.10403832991102, "grad_norm": 3.1767477989196777, "learning_rate": 5.945205479452054e-07, "log_odds_chosen": 1.6732025146484375, "log_odds_ratio": -0.2460787296295166, "logits/chosen": 0.7685654163360596, "logits/rejected": 0.7264596819877625, "logps/chosen": -1.760028600692749, "logps/rejected": -3.274379014968872, "loss": 0.5961, "nll_loss": 0.5715129971504211, "rewards/accuracies": 1.0, "rewards/chosen": -0.1760028600692749, "rewards/margins": 0.1514350175857544, "rewards/rejected": -0.3274378776550293, "step": 2960 }, { "epoch": 8.106776180698152, "grad_norm": 5.463622570037842, "learning_rate": 5.943835616438356e-07, "log_odds_chosen": 2.9421443939208984, "log_odds_ratio": -0.2414979338645935, "logits/chosen": 0.8331335186958313, "logits/rejected": 0.8130025267601013, "logps/chosen": -2.0219309329986572, "logps/rejected": -4.763632774353027, "loss": 0.6801, "nll_loss": 0.6559810638427734, "rewards/accuracies": 0.875, "rewards/chosen": -0.20219309628009796, "rewards/margins": 0.2741702198982239, "rewards/rejected": -0.47636333107948303, "step": 2961 }, { "epoch": 8.109514031485284, "grad_norm": 4.376410961151123, "learning_rate": 5.942465753424657e-07, "log_odds_chosen": 2.3576574325561523, "log_odds_ratio": -0.1582355499267578, "logits/chosen": 0.9675130844116211, "logits/rejected": 1.0178292989730835, "logps/chosen": -1.8422815799713135, "logps/rejected": -3.941020965576172, "loss": 0.6181, "nll_loss": 0.6023046970367432, "rewards/accuracies": 1.0, "rewards/chosen": -0.18422815203666687, "rewards/margins": 0.20987394452095032, "rewards/rejected": -0.3941020965576172, "step": 2962 }, { "epoch": 8.112251882272416, "grad_norm": 3.348222494125366, "learning_rate": 5.941095890410958e-07, "log_odds_chosen": 1.6802932024002075, "log_odds_ratio": -0.4157242774963379, "logits/chosen": 0.9251129627227783, "logits/rejected": 0.9776046276092529, "logps/chosen": -1.999760627746582, "logps/rejected": -3.5968899726867676, "loss": 0.6977, "nll_loss": 0.6561270952224731, "rewards/accuracies": 0.75, "rewards/chosen": -0.19997605681419373, "rewards/margins": 0.15971291065216064, "rewards/rejected": -0.35968899726867676, "step": 2963 }, { "epoch": 8.114989733059549, "grad_norm": 3.538619041442871, "learning_rate": 5.93972602739726e-07, "log_odds_chosen": 2.095669746398926, "log_odds_ratio": -0.3878847062587738, "logits/chosen": 1.1875698566436768, "logits/rejected": 1.185349464416504, "logps/chosen": -1.928682565689087, "logps/rejected": -3.8636975288391113, "loss": 0.7113, "nll_loss": 0.672469437122345, "rewards/accuracies": 0.875, "rewards/chosen": -0.19286826252937317, "rewards/margins": 0.1935015171766281, "rewards/rejected": -0.3863697648048401, "step": 2964 }, { "epoch": 8.11772758384668, "grad_norm": 6.4286112785339355, "learning_rate": 5.938356164383561e-07, "log_odds_chosen": 1.8283710479736328, "log_odds_ratio": -0.30432605743408203, "logits/chosen": 1.0835562944412231, "logits/rejected": 1.050276517868042, "logps/chosen": -2.66752290725708, "logps/rejected": -4.382574558258057, "loss": 0.7165, "nll_loss": 0.6860760450363159, "rewards/accuracies": 0.75, "rewards/chosen": -0.26675230264663696, "rewards/margins": 0.1715051829814911, "rewards/rejected": -0.43825748562812805, "step": 2965 }, { "epoch": 8.120465434633813, "grad_norm": 9.35216236114502, "learning_rate": 5.936986301369863e-07, "log_odds_chosen": 1.4799543619155884, "log_odds_ratio": -0.6582931280136108, "logits/chosen": 0.9045408964157104, "logits/rejected": 0.8730781078338623, "logps/chosen": -2.685412883758545, "logps/rejected": -4.014822959899902, "loss": 0.6856, "nll_loss": 0.6197534799575806, "rewards/accuracies": 0.875, "rewards/chosen": -0.2685413062572479, "rewards/margins": 0.13294100761413574, "rewards/rejected": -0.40148234367370605, "step": 2966 }, { "epoch": 8.123203285420944, "grad_norm": 3.485689878463745, "learning_rate": 5.935616438356164e-07, "log_odds_chosen": 1.9044848680496216, "log_odds_ratio": -0.30488333106040955, "logits/chosen": 0.5013999938964844, "logits/rejected": 0.4321150779724121, "logps/chosen": -1.5643843412399292, "logps/rejected": -3.3109328746795654, "loss": 0.7102, "nll_loss": 0.6796894073486328, "rewards/accuracies": 1.0, "rewards/chosen": -0.1564384400844574, "rewards/margins": 0.17465485632419586, "rewards/rejected": -0.33109328150749207, "step": 2967 }, { "epoch": 8.125941136208077, "grad_norm": 3.0362932682037354, "learning_rate": 5.934246575342465e-07, "log_odds_chosen": 3.9282336235046387, "log_odds_ratio": -0.10792220383882523, "logits/chosen": 1.1005247831344604, "logits/rejected": 1.069710612297058, "logps/chosen": -1.7719242572784424, "logps/rejected": -5.478582382202148, "loss": 0.6114, "nll_loss": 0.6006461381912231, "rewards/accuracies": 1.0, "rewards/chosen": -0.17719241976737976, "rewards/margins": 0.3706657886505127, "rewards/rejected": -0.5478582382202148, "step": 2968 }, { "epoch": 8.128678986995208, "grad_norm": 4.3324127197265625, "learning_rate": 5.932876712328767e-07, "log_odds_chosen": 1.574952244758606, "log_odds_ratio": -0.41958674788475037, "logits/chosen": 0.9170136451721191, "logits/rejected": 0.9064618349075317, "logps/chosen": -2.2088212966918945, "logps/rejected": -3.689136028289795, "loss": 0.6962, "nll_loss": 0.6542000770568848, "rewards/accuracies": 0.875, "rewards/chosen": -0.22088214755058289, "rewards/margins": 0.14803147315979004, "rewards/rejected": -0.36891359090805054, "step": 2969 }, { "epoch": 8.131416837782341, "grad_norm": 3.461902379989624, "learning_rate": 5.931506849315067e-07, "log_odds_chosen": 1.8215142488479614, "log_odds_ratio": -0.4774587154388428, "logits/chosen": 0.8376345634460449, "logits/rejected": 0.8830243349075317, "logps/chosen": -1.981313943862915, "logps/rejected": -3.708996295928955, "loss": 0.718, "nll_loss": 0.6702920198440552, "rewards/accuracies": 0.875, "rewards/chosen": -0.19813139736652374, "rewards/margins": 0.1727682203054428, "rewards/rejected": -0.37089961767196655, "step": 2970 }, { "epoch": 8.134154688569472, "grad_norm": 3.210231065750122, "learning_rate": 5.930136986301369e-07, "log_odds_chosen": 1.5509103536605835, "log_odds_ratio": -0.328483521938324, "logits/chosen": 0.6812334656715393, "logits/rejected": 0.660910427570343, "logps/chosen": -2.094945192337036, "logps/rejected": -3.521902561187744, "loss": 0.6259, "nll_loss": 0.5930947065353394, "rewards/accuracies": 0.875, "rewards/chosen": -0.20949451625347137, "rewards/margins": 0.14269578456878662, "rewards/rejected": -0.3521903157234192, "step": 2971 }, { "epoch": 8.136892539356605, "grad_norm": 4.102993011474609, "learning_rate": 5.928767123287671e-07, "log_odds_chosen": 2.326411247253418, "log_odds_ratio": -0.2845665216445923, "logits/chosen": 0.8583927154541016, "logits/rejected": 0.8641229867935181, "logps/chosen": -2.235093116760254, "logps/rejected": -4.354791641235352, "loss": 0.7108, "nll_loss": 0.6823869347572327, "rewards/accuracies": 0.875, "rewards/chosen": -0.2235092967748642, "rewards/margins": 0.21196985244750977, "rewards/rejected": -0.43547916412353516, "step": 2972 }, { "epoch": 8.139630390143736, "grad_norm": 6.359389781951904, "learning_rate": 5.927397260273972e-07, "log_odds_chosen": 2.421060562133789, "log_odds_ratio": -0.5564091205596924, "logits/chosen": 0.6928591728210449, "logits/rejected": 0.6692160964012146, "logps/chosen": -3.323558807373047, "logps/rejected": -5.681326389312744, "loss": 0.7, "nll_loss": 0.6443458795547485, "rewards/accuracies": 0.75, "rewards/chosen": -0.33235591650009155, "rewards/margins": 0.23577672243118286, "rewards/rejected": -0.5681326389312744, "step": 2973 }, { "epoch": 8.14236824093087, "grad_norm": 3.9055776596069336, "learning_rate": 5.926027397260273e-07, "log_odds_chosen": 1.7045499086380005, "log_odds_ratio": -0.29325127601623535, "logits/chosen": 0.8037256598472595, "logits/rejected": 0.786657452583313, "logps/chosen": -1.4228216409683228, "logps/rejected": -2.9225590229034424, "loss": 0.5688, "nll_loss": 0.5394788980484009, "rewards/accuracies": 1.0, "rewards/chosen": -0.142282173037529, "rewards/margins": 0.14997373521327972, "rewards/rejected": -0.2922559082508087, "step": 2974 }, { "epoch": 8.145106091718002, "grad_norm": 3.240314483642578, "learning_rate": 5.924657534246575e-07, "log_odds_chosen": 1.764787197113037, "log_odds_ratio": -0.308061808347702, "logits/chosen": 0.9236463308334351, "logits/rejected": 0.9504600167274475, "logps/chosen": -2.2843480110168457, "logps/rejected": -3.9725160598754883, "loss": 0.6962, "nll_loss": 0.6653748750686646, "rewards/accuracies": 1.0, "rewards/chosen": -0.22843480110168457, "rewards/margins": 0.16881680488586426, "rewards/rejected": -0.39725160598754883, "step": 2975 }, { "epoch": 8.147843942505133, "grad_norm": 4.952475547790527, "learning_rate": 5.923287671232876e-07, "log_odds_chosen": 2.1635727882385254, "log_odds_ratio": -0.22964294254779816, "logits/chosen": 0.966373085975647, "logits/rejected": 1.001277208328247, "logps/chosen": -2.579923629760742, "logps/rejected": -4.609657287597656, "loss": 0.6346, "nll_loss": 0.6116147637367249, "rewards/accuracies": 1.0, "rewards/chosen": -0.25799238681793213, "rewards/margins": 0.2029733806848526, "rewards/rejected": -0.46096575260162354, "step": 2976 }, { "epoch": 8.150581793292266, "grad_norm": 3.3639883995056152, "learning_rate": 5.921917808219177e-07, "log_odds_chosen": 1.0701024532318115, "log_odds_ratio": -0.3274865746498108, "logits/chosen": 1.0517445802688599, "logits/rejected": 0.9837912321090698, "logps/chosen": -1.6921422481536865, "logps/rejected": -2.5719926357269287, "loss": 0.5843, "nll_loss": 0.5515735745429993, "rewards/accuracies": 1.0, "rewards/chosen": -0.16921423375606537, "rewards/margins": 0.08798505365848541, "rewards/rejected": -0.2571992874145508, "step": 2977 }, { "epoch": 8.153319644079398, "grad_norm": 6.8486504554748535, "learning_rate": 5.920547945205479e-07, "log_odds_chosen": 2.185673952102661, "log_odds_ratio": -0.5802853107452393, "logits/chosen": 1.0548745393753052, "logits/rejected": 1.0037933588027954, "logps/chosen": -2.9301767349243164, "logps/rejected": -5.084559440612793, "loss": 0.8047, "nll_loss": 0.7467171549797058, "rewards/accuracies": 0.875, "rewards/chosen": -0.2930176854133606, "rewards/margins": 0.21543821692466736, "rewards/rejected": -0.5084558725357056, "step": 2978 }, { "epoch": 8.15605749486653, "grad_norm": 4.960999011993408, "learning_rate": 5.91917808219178e-07, "log_odds_chosen": 1.6720635890960693, "log_odds_ratio": -0.5314061641693115, "logits/chosen": 1.1401898860931396, "logits/rejected": 1.1382744312286377, "logps/chosen": -2.7182750701904297, "logps/rejected": -4.336705207824707, "loss": 0.7104, "nll_loss": 0.6572782397270203, "rewards/accuracies": 0.75, "rewards/chosen": -0.2718275189399719, "rewards/margins": 0.16184306144714355, "rewards/rejected": -0.4336705803871155, "step": 2979 }, { "epoch": 8.158795345653662, "grad_norm": 9.466907501220703, "learning_rate": 5.917808219178083e-07, "log_odds_chosen": 2.0895845890045166, "log_odds_ratio": -0.355818510055542, "logits/chosen": 1.0641931295394897, "logits/rejected": 0.9976856708526611, "logps/chosen": -2.830904960632324, "logps/rejected": -4.825561046600342, "loss": 0.8685, "nll_loss": 0.8328908681869507, "rewards/accuracies": 0.875, "rewards/chosen": -0.2830905020236969, "rewards/margins": 0.19946563243865967, "rewards/rejected": -0.4825561046600342, "step": 2980 }, { "epoch": 8.161533196440795, "grad_norm": 5.839372634887695, "learning_rate": 5.916438356164383e-07, "log_odds_chosen": 1.2194770574569702, "log_odds_ratio": -0.5050547122955322, "logits/chosen": 0.9521172046661377, "logits/rejected": 0.949876070022583, "logps/chosen": -2.384345054626465, "logps/rejected": -3.484682083129883, "loss": 0.7026, "nll_loss": 0.6521109938621521, "rewards/accuracies": 0.75, "rewards/chosen": -0.23843452334403992, "rewards/margins": 0.11003371328115463, "rewards/rejected": -0.34846824407577515, "step": 2981 }, { "epoch": 8.164271047227926, "grad_norm": 5.862699031829834, "learning_rate": 5.915068493150684e-07, "log_odds_chosen": 1.423072338104248, "log_odds_ratio": -0.38245946168899536, "logits/chosen": 0.7562023997306824, "logits/rejected": 0.7279322147369385, "logps/chosen": -2.235130548477173, "logps/rejected": -3.558927297592163, "loss": 0.7294, "nll_loss": 0.6911110281944275, "rewards/accuracies": 0.875, "rewards/chosen": -0.22351306676864624, "rewards/margins": 0.1323796808719635, "rewards/rejected": -0.35589271783828735, "step": 2982 }, { "epoch": 8.167008898015059, "grad_norm": 3.637211561203003, "learning_rate": 5.913698630136987e-07, "log_odds_chosen": 1.8664101362228394, "log_odds_ratio": -0.28514915704727173, "logits/chosen": 1.0334737300872803, "logits/rejected": 1.070947527885437, "logps/chosen": -2.141427516937256, "logps/rejected": -3.9406981468200684, "loss": 0.7094, "nll_loss": 0.6808789968490601, "rewards/accuracies": 0.875, "rewards/chosen": -0.21414276957511902, "rewards/margins": 0.1799270510673523, "rewards/rejected": -0.3940698206424713, "step": 2983 }, { "epoch": 8.16974674880219, "grad_norm": 3.5338096618652344, "learning_rate": 5.912328767123286e-07, "log_odds_chosen": 2.680412769317627, "log_odds_ratio": -0.30459195375442505, "logits/chosen": 1.0625542402267456, "logits/rejected": 1.1078100204467773, "logps/chosen": -2.4198288917541504, "logps/rejected": -4.999156951904297, "loss": 0.6806, "nll_loss": 0.650188684463501, "rewards/accuracies": 0.75, "rewards/chosen": -0.24198287725448608, "rewards/margins": 0.2579328417778015, "rewards/rejected": -0.49991574883461, "step": 2984 }, { "epoch": 8.172484599589323, "grad_norm": 3.114471197128296, "learning_rate": 5.910958904109589e-07, "log_odds_chosen": 1.2282524108886719, "log_odds_ratio": -0.4098995327949524, "logits/chosen": 0.8247063159942627, "logits/rejected": 0.7813388705253601, "logps/chosen": -2.0460948944091797, "logps/rejected": -3.1305108070373535, "loss": 0.6409, "nll_loss": 0.5998752117156982, "rewards/accuracies": 0.875, "rewards/chosen": -0.2046094834804535, "rewards/margins": 0.10844159871339798, "rewards/rejected": -0.3130510747432709, "step": 2985 }, { "epoch": 8.175222450376454, "grad_norm": 3.820242404937744, "learning_rate": 5.909589041095891e-07, "log_odds_chosen": 1.8523672819137573, "log_odds_ratio": -0.2971302568912506, "logits/chosen": 0.6919794082641602, "logits/rejected": 0.6799887418746948, "logps/chosen": -1.556750774383545, "logps/rejected": -3.2091219425201416, "loss": 0.7497, "nll_loss": 0.7199389934539795, "rewards/accuracies": 0.875, "rewards/chosen": -0.15567508339881897, "rewards/margins": 0.16523712873458862, "rewards/rejected": -0.3209122121334076, "step": 2986 }, { "epoch": 8.177960301163587, "grad_norm": 3.6614573001861572, "learning_rate": 5.908219178082192e-07, "log_odds_chosen": 2.045382499694824, "log_odds_ratio": -0.23638984560966492, "logits/chosen": 0.6178778409957886, "logits/rejected": 0.5156957507133484, "logps/chosen": -1.8123445510864258, "logps/rejected": -3.6784987449645996, "loss": 0.6341, "nll_loss": 0.6104583740234375, "rewards/accuracies": 1.0, "rewards/chosen": -0.1812344789505005, "rewards/margins": 0.18661540746688843, "rewards/rejected": -0.3678498864173889, "step": 2987 }, { "epoch": 8.180698151950718, "grad_norm": 3.4299850463867188, "learning_rate": 5.906849315068493e-07, "log_odds_chosen": 2.1401660442352295, "log_odds_ratio": -0.28165027499198914, "logits/chosen": 0.7560629844665527, "logits/rejected": 0.7439459562301636, "logps/chosen": -1.9630274772644043, "logps/rejected": -3.978825807571411, "loss": 0.6691, "nll_loss": 0.6409043669700623, "rewards/accuracies": 0.875, "rewards/chosen": -0.19630274176597595, "rewards/margins": 0.20157986879348755, "rewards/rejected": -0.3978826105594635, "step": 2988 }, { "epoch": 8.183436002737851, "grad_norm": 3.5250229835510254, "learning_rate": 5.905479452054795e-07, "log_odds_chosen": 1.8018804788589478, "log_odds_ratio": -0.2563294768333435, "logits/chosen": 0.8947571516036987, "logits/rejected": 0.9231007695198059, "logps/chosen": -1.875787377357483, "logps/rejected": -3.533310890197754, "loss": 0.6054, "nll_loss": 0.5797913670539856, "rewards/accuracies": 1.0, "rewards/chosen": -0.1875787377357483, "rewards/margins": 0.1657523363828659, "rewards/rejected": -0.3533310890197754, "step": 2989 }, { "epoch": 8.186173853524982, "grad_norm": 6.157295227050781, "learning_rate": 5.904109589041096e-07, "log_odds_chosen": 1.9068940877914429, "log_odds_ratio": -0.7051311731338501, "logits/chosen": 0.7448055744171143, "logits/rejected": 0.7192124724388123, "logps/chosen": -2.764554023742676, "logps/rejected": -4.589179039001465, "loss": 0.745, "nll_loss": 0.6744741797447205, "rewards/accuracies": 0.75, "rewards/chosen": -0.2764554023742676, "rewards/margins": 0.18246248364448547, "rewards/rejected": -0.45891791582107544, "step": 2990 }, { "epoch": 8.188911704312115, "grad_norm": 4.14328670501709, "learning_rate": 5.902739726027397e-07, "log_odds_chosen": 1.863284945487976, "log_odds_ratio": -0.3109820783138275, "logits/chosen": 0.7692285776138306, "logits/rejected": 0.6474913358688354, "logps/chosen": -1.9463059902191162, "logps/rejected": -3.6657891273498535, "loss": 0.7476, "nll_loss": 0.716496467590332, "rewards/accuracies": 0.875, "rewards/chosen": -0.19463059306144714, "rewards/margins": 0.17194834351539612, "rewards/rejected": -0.3665789067745209, "step": 2991 }, { "epoch": 8.191649555099247, "grad_norm": 3.397033452987671, "learning_rate": 5.901369863013699e-07, "log_odds_chosen": 3.1249752044677734, "log_odds_ratio": -0.16313518583774567, "logits/chosen": 0.7150420546531677, "logits/rejected": 0.7505372762680054, "logps/chosen": -2.0303783416748047, "logps/rejected": -4.98703670501709, "loss": 0.7377, "nll_loss": 0.7213588953018188, "rewards/accuracies": 1.0, "rewards/chosen": -0.203037828207016, "rewards/margins": 0.29566583037376404, "rewards/rejected": -0.49870365858078003, "step": 2992 }, { "epoch": 8.19438740588638, "grad_norm": 3.379305124282837, "learning_rate": 5.9e-07, "log_odds_chosen": 2.2586987018585205, "log_odds_ratio": -0.2237536460161209, "logits/chosen": 0.9324010014533997, "logits/rejected": 0.9831541776657104, "logps/chosen": -2.103884696960449, "logps/rejected": -4.160552024841309, "loss": 0.7029, "nll_loss": 0.6805458068847656, "rewards/accuracies": 1.0, "rewards/chosen": -0.2103884518146515, "rewards/margins": 0.20566678047180176, "rewards/rejected": -0.41605526208877563, "step": 2993 }, { "epoch": 8.19712525667351, "grad_norm": 3.3214778900146484, "learning_rate": 5.898630136986302e-07, "log_odds_chosen": 2.8970584869384766, "log_odds_ratio": -0.23616895079612732, "logits/chosen": 0.6810507774353027, "logits/rejected": 0.6569207310676575, "logps/chosen": -1.6898332834243774, "logps/rejected": -4.378026008605957, "loss": 0.6344, "nll_loss": 0.6107891798019409, "rewards/accuracies": 1.0, "rewards/chosen": -0.1689833253622055, "rewards/margins": 0.26881927251815796, "rewards/rejected": -0.43780261278152466, "step": 2994 }, { "epoch": 8.199863107460644, "grad_norm": 5.176691055297852, "learning_rate": 5.897260273972603e-07, "log_odds_chosen": 1.1753275394439697, "log_odds_ratio": -0.4361104369163513, "logits/chosen": 0.7353168725967407, "logits/rejected": 0.7244110703468323, "logps/chosen": -2.3937320709228516, "logps/rejected": -3.4567532539367676, "loss": 0.6492, "nll_loss": 0.6055516004562378, "rewards/accuracies": 0.875, "rewards/chosen": -0.23937320709228516, "rewards/margins": 0.10630212724208832, "rewards/rejected": -0.34567534923553467, "step": 2995 }, { "epoch": 8.202600958247775, "grad_norm": 3.366170644760132, "learning_rate": 5.895890410958904e-07, "log_odds_chosen": 1.3804571628570557, "log_odds_ratio": -0.28236424922943115, "logits/chosen": 0.7645020484924316, "logits/rejected": 0.675673246383667, "logps/chosen": -1.5911672115325928, "logps/rejected": -2.830841541290283, "loss": 0.6353, "nll_loss": 0.6070200204849243, "rewards/accuracies": 1.0, "rewards/chosen": -0.159116730093956, "rewards/margins": 0.12396744638681412, "rewards/rejected": -0.2830841541290283, "step": 2996 }, { "epoch": 8.205338809034908, "grad_norm": 7.117343425750732, "learning_rate": 5.894520547945206e-07, "log_odds_chosen": 1.0456715822219849, "log_odds_ratio": -0.49428945779800415, "logits/chosen": 0.6146078109741211, "logits/rejected": 0.525712251663208, "logps/chosen": -2.457821846008301, "logps/rejected": -3.419673442840576, "loss": 0.8297, "nll_loss": 0.7802330851554871, "rewards/accuracies": 0.875, "rewards/chosen": -0.24578219652175903, "rewards/margins": 0.09618513286113739, "rewards/rejected": -0.3419673442840576, "step": 2997 }, { "epoch": 8.208076659822039, "grad_norm": 6.0411505699157715, "learning_rate": 5.893150684931507e-07, "log_odds_chosen": 1.1232608556747437, "log_odds_ratio": -0.5949830412864685, "logits/chosen": 0.6138632297515869, "logits/rejected": 0.5623586177825928, "logps/chosen": -2.6967525482177734, "logps/rejected": -3.743110418319702, "loss": 0.6935, "nll_loss": 0.6339747905731201, "rewards/accuracies": 0.75, "rewards/chosen": -0.26967525482177734, "rewards/margins": 0.10463576018810272, "rewards/rejected": -0.37431102991104126, "step": 2998 }, { "epoch": 8.210814510609172, "grad_norm": 3.3654677867889404, "learning_rate": 5.891780821917808e-07, "log_odds_chosen": 2.2875664234161377, "log_odds_ratio": -0.24055717885494232, "logits/chosen": 0.7189005613327026, "logits/rejected": 0.7227686643600464, "logps/chosen": -2.139603614807129, "logps/rejected": -4.327233791351318, "loss": 0.6813, "nll_loss": 0.6572911739349365, "rewards/accuracies": 1.0, "rewards/chosen": -0.21396034955978394, "rewards/margins": 0.21876302361488342, "rewards/rejected": -0.43272340297698975, "step": 2999 }, { "epoch": 8.213552361396303, "grad_norm": 3.069143533706665, "learning_rate": 5.89041095890411e-07, "log_odds_chosen": 2.402583122253418, "log_odds_ratio": -0.16830773651599884, "logits/chosen": 1.0144414901733398, "logits/rejected": 1.0171618461608887, "logps/chosen": -1.7739416360855103, "logps/rejected": -3.9084486961364746, "loss": 0.5912, "nll_loss": 0.5743444561958313, "rewards/accuracies": 1.0, "rewards/chosen": -0.17739415168762207, "rewards/margins": 0.21345074474811554, "rewards/rejected": -0.3908448815345764, "step": 3000 }, { "epoch": 8.216290212183436, "grad_norm": 3.321791172027588, "learning_rate": 5.88904109589041e-07, "log_odds_chosen": 2.760915756225586, "log_odds_ratio": -0.19147740304470062, "logits/chosen": 1.009350299835205, "logits/rejected": 1.0549867153167725, "logps/chosen": -2.21797251701355, "logps/rejected": -4.881753921508789, "loss": 0.5554, "nll_loss": 0.5362477898597717, "rewards/accuracies": 0.875, "rewards/chosen": -0.22179725766181946, "rewards/margins": 0.26637810468673706, "rewards/rejected": -0.4881753921508789, "step": 3001 }, { "epoch": 8.219028062970569, "grad_norm": 3.7032201290130615, "learning_rate": 5.887671232876712e-07, "log_odds_chosen": 1.4438050985336304, "log_odds_ratio": -0.3288559913635254, "logits/chosen": 0.7122484445571899, "logits/rejected": 0.7116626501083374, "logps/chosen": -2.516861915588379, "logps/rejected": -3.8511152267456055, "loss": 0.851, "nll_loss": 0.8181551098823547, "rewards/accuracies": 0.875, "rewards/chosen": -0.2516861855983734, "rewards/margins": 0.13342534005641937, "rewards/rejected": -0.385111540555954, "step": 3002 }, { "epoch": 8.2217659137577, "grad_norm": 5.1369309425354, "learning_rate": 5.886301369863014e-07, "log_odds_chosen": 0.9304801821708679, "log_odds_ratio": -0.7771849036216736, "logits/chosen": 1.091998815536499, "logits/rejected": 1.1607555150985718, "logps/chosen": -2.716052532196045, "logps/rejected": -3.5894176959991455, "loss": 0.7202, "nll_loss": 0.6424410343170166, "rewards/accuracies": 0.625, "rewards/chosen": -0.2716052830219269, "rewards/margins": 0.08733649551868439, "rewards/rejected": -0.3589417636394501, "step": 3003 }, { "epoch": 8.224503764544833, "grad_norm": 3.4233922958374023, "learning_rate": 5.884931506849315e-07, "log_odds_chosen": 1.9797395467758179, "log_odds_ratio": -0.25373461842536926, "logits/chosen": 0.9119160175323486, "logits/rejected": 0.9301247596740723, "logps/chosen": -2.073978900909424, "logps/rejected": -3.8818774223327637, "loss": 0.5995, "nll_loss": 0.5741558074951172, "rewards/accuracies": 0.875, "rewards/chosen": -0.20739790797233582, "rewards/margins": 0.18078985810279846, "rewards/rejected": -0.3881877660751343, "step": 3004 }, { "epoch": 8.227241615331964, "grad_norm": 4.477994441986084, "learning_rate": 5.883561643835616e-07, "log_odds_chosen": 1.283430576324463, "log_odds_ratio": -0.5150773525238037, "logits/chosen": 0.9977059364318848, "logits/rejected": 0.9585280418395996, "logps/chosen": -1.8802767992019653, "logps/rejected": -2.989234209060669, "loss": 0.6496, "nll_loss": 0.5980851650238037, "rewards/accuracies": 0.875, "rewards/chosen": -0.18802767992019653, "rewards/margins": 0.11089573800563812, "rewards/rejected": -0.29892343282699585, "step": 3005 }, { "epoch": 8.229979466119097, "grad_norm": 3.3584654331207275, "learning_rate": 5.882191780821918e-07, "log_odds_chosen": 2.419161796569824, "log_odds_ratio": -0.23858299851417542, "logits/chosen": 0.9551590085029602, "logits/rejected": 0.9759555459022522, "logps/chosen": -2.4521989822387695, "logps/rejected": -4.782663345336914, "loss": 0.5928, "nll_loss": 0.5689805746078491, "rewards/accuracies": 0.875, "rewards/chosen": -0.2452199012041092, "rewards/margins": 0.23304642736911774, "rewards/rejected": -0.47826632857322693, "step": 3006 }, { "epoch": 8.232717316906228, "grad_norm": 3.1800711154937744, "learning_rate": 5.880821917808219e-07, "log_odds_chosen": 3.833080291748047, "log_odds_ratio": -0.1458919197320938, "logits/chosen": 0.7727092504501343, "logits/rejected": 0.8182812929153442, "logps/chosen": -1.8646845817565918, "logps/rejected": -5.539022445678711, "loss": 0.6948, "nll_loss": 0.6802589893341064, "rewards/accuracies": 1.0, "rewards/chosen": -0.1864684373140335, "rewards/margins": 0.3674338459968567, "rewards/rejected": -0.553902268409729, "step": 3007 }, { "epoch": 8.235455167693361, "grad_norm": 3.5087151527404785, "learning_rate": 5.879452054794521e-07, "log_odds_chosen": 0.9946891665458679, "log_odds_ratio": -0.4056569039821625, "logits/chosen": 0.5820635557174683, "logits/rejected": 0.5071374177932739, "logps/chosen": -1.8435636758804321, "logps/rejected": -2.7279486656188965, "loss": 0.7364, "nll_loss": 0.6958050727844238, "rewards/accuracies": 0.875, "rewards/chosen": -0.18435636162757874, "rewards/margins": 0.0884384885430336, "rewards/rejected": -0.27279484272003174, "step": 3008 }, { "epoch": 8.238193018480493, "grad_norm": 2.974738597869873, "learning_rate": 5.878082191780822e-07, "log_odds_chosen": 2.580951690673828, "log_odds_ratio": -0.1847953498363495, "logits/chosen": 1.1493961811065674, "logits/rejected": 1.1548271179199219, "logps/chosen": -1.8319470882415771, "logps/rejected": -4.261423110961914, "loss": 0.5422, "nll_loss": 0.5237168073654175, "rewards/accuracies": 1.0, "rewards/chosen": -0.18319469690322876, "rewards/margins": 0.24294763803482056, "rewards/rejected": -0.4261423349380493, "step": 3009 }, { "epoch": 8.240930869267626, "grad_norm": 3.2138633728027344, "learning_rate": 5.876712328767123e-07, "log_odds_chosen": 1.1986757516860962, "log_odds_ratio": -0.30665767192840576, "logits/chosen": 0.684495210647583, "logits/rejected": 0.6472821831703186, "logps/chosen": -1.7335482835769653, "logps/rejected": -2.7927122116088867, "loss": 0.6926, "nll_loss": 0.6619117259979248, "rewards/accuracies": 1.0, "rewards/chosen": -0.17335481941699982, "rewards/margins": 0.10591641068458557, "rewards/rejected": -0.2792712450027466, "step": 3010 }, { "epoch": 8.243668720054757, "grad_norm": 3.0573956966400146, "learning_rate": 5.875342465753425e-07, "log_odds_chosen": 1.8627562522888184, "log_odds_ratio": -0.296735942363739, "logits/chosen": 1.0473339557647705, "logits/rejected": 1.068068027496338, "logps/chosen": -1.672333002090454, "logps/rejected": -3.387033462524414, "loss": 0.5728, "nll_loss": 0.5431137084960938, "rewards/accuracies": 0.875, "rewards/chosen": -0.16723328828811646, "rewards/margins": 0.171470046043396, "rewards/rejected": -0.33870333433151245, "step": 3011 }, { "epoch": 8.24640657084189, "grad_norm": 4.894333362579346, "learning_rate": 5.873972602739726e-07, "log_odds_chosen": 1.4464045763015747, "log_odds_ratio": -0.31278643012046814, "logits/chosen": 1.002549648284912, "logits/rejected": 1.0393050909042358, "logps/chosen": -2.6985363960266113, "logps/rejected": -4.022995948791504, "loss": 0.5745, "nll_loss": 0.5432074666023254, "rewards/accuracies": 0.875, "rewards/chosen": -0.2698536515235901, "rewards/margins": 0.13244594633579254, "rewards/rejected": -0.40229955315589905, "step": 3012 }, { "epoch": 8.24914442162902, "grad_norm": 8.934776306152344, "learning_rate": 5.872602739726027e-07, "log_odds_chosen": 1.1068999767303467, "log_odds_ratio": -0.7134271860122681, "logits/chosen": 0.7543593645095825, "logits/rejected": 0.7850638628005981, "logps/chosen": -2.538815975189209, "logps/rejected": -3.58807373046875, "loss": 0.716, "nll_loss": 0.6446287631988525, "rewards/accuracies": 0.875, "rewards/chosen": -0.25388163328170776, "rewards/margins": 0.1049257442355156, "rewards/rejected": -0.35880735516548157, "step": 3013 }, { "epoch": 8.251882272416154, "grad_norm": 4.048233985900879, "learning_rate": 5.871232876712329e-07, "log_odds_chosen": 1.9724912643432617, "log_odds_ratio": -0.38281121850013733, "logits/chosen": 0.9797030687332153, "logits/rejected": 0.9578274488449097, "logps/chosen": -2.010787010192871, "logps/rejected": -3.7747297286987305, "loss": 0.7143, "nll_loss": 0.6759780645370483, "rewards/accuracies": 0.75, "rewards/chosen": -0.20107869803905487, "rewards/margins": 0.1763942837715149, "rewards/rejected": -0.37747299671173096, "step": 3014 }, { "epoch": 8.254620123203285, "grad_norm": 2.901362180709839, "learning_rate": 5.869863013698629e-07, "log_odds_chosen": 3.53163480758667, "log_odds_ratio": -0.1443125456571579, "logits/chosen": 0.8806226849555969, "logits/rejected": 0.8759067058563232, "logps/chosen": -1.5811169147491455, "logps/rejected": -4.882388114929199, "loss": 0.5942, "nll_loss": 0.5797923803329468, "rewards/accuracies": 0.875, "rewards/chosen": -0.15811169147491455, "rewards/margins": 0.33012712001800537, "rewards/rejected": -0.4882388114929199, "step": 3015 }, { "epoch": 8.257357973990418, "grad_norm": 3.82498836517334, "learning_rate": 5.868493150684931e-07, "log_odds_chosen": 3.489065647125244, "log_odds_ratio": -0.25143641233444214, "logits/chosen": 0.826312780380249, "logits/rejected": 0.7774386405944824, "logps/chosen": -1.9649524688720703, "logps/rejected": -5.266856670379639, "loss": 0.6531, "nll_loss": 0.6279568076133728, "rewards/accuracies": 0.875, "rewards/chosen": -0.19649526476860046, "rewards/margins": 0.33019042015075684, "rewards/rejected": -0.5266856551170349, "step": 3016 }, { "epoch": 8.260095824777549, "grad_norm": 3.570976972579956, "learning_rate": 5.867123287671233e-07, "log_odds_chosen": 2.7560627460479736, "log_odds_ratio": -0.16244706511497498, "logits/chosen": 0.9767311811447144, "logits/rejected": 0.9933329820632935, "logps/chosen": -1.902844786643982, "logps/rejected": -4.387205123901367, "loss": 0.6005, "nll_loss": 0.5842232704162598, "rewards/accuracies": 1.0, "rewards/chosen": -0.19028449058532715, "rewards/margins": 0.24843601882457733, "rewards/rejected": -0.4387205243110657, "step": 3017 }, { "epoch": 8.262833675564682, "grad_norm": 3.367374897003174, "learning_rate": 5.865753424657534e-07, "log_odds_chosen": 2.465085983276367, "log_odds_ratio": -0.23556718230247498, "logits/chosen": 0.855136513710022, "logits/rejected": 0.8735986351966858, "logps/chosen": -1.7334749698638916, "logps/rejected": -3.9893856048583984, "loss": 0.5967, "nll_loss": 0.5731384754180908, "rewards/accuracies": 0.875, "rewards/chosen": -0.17334750294685364, "rewards/margins": 0.22559106349945068, "rewards/rejected": -0.3989385664463043, "step": 3018 }, { "epoch": 8.265571526351813, "grad_norm": 4.341354846954346, "learning_rate": 5.864383561643835e-07, "log_odds_chosen": 0.6559768319129944, "log_odds_ratio": -0.74195396900177, "logits/chosen": 0.8316589593887329, "logits/rejected": 0.876882016658783, "logps/chosen": -2.134073257446289, "logps/rejected": -2.7533493041992188, "loss": 0.7489, "nll_loss": 0.6747252941131592, "rewards/accuracies": 0.5, "rewards/chosen": -0.21340733766555786, "rewards/margins": 0.06192762777209282, "rewards/rejected": -0.2753349542617798, "step": 3019 }, { "epoch": 8.268309377138946, "grad_norm": 2.8775136470794678, "learning_rate": 5.863013698630137e-07, "log_odds_chosen": 2.9329164028167725, "log_odds_ratio": -0.16515851020812988, "logits/chosen": 1.1286064386367798, "logits/rejected": 1.0758960247039795, "logps/chosen": -1.8989739418029785, "logps/rejected": -4.673962593078613, "loss": 0.6607, "nll_loss": 0.6441402435302734, "rewards/accuracies": 1.0, "rewards/chosen": -0.18989738821983337, "rewards/margins": 0.27749893069267273, "rewards/rejected": -0.4673963189125061, "step": 3020 }, { "epoch": 8.271047227926077, "grad_norm": 3.2783472537994385, "learning_rate": 5.861643835616438e-07, "log_odds_chosen": 2.9968738555908203, "log_odds_ratio": -0.19323992729187012, "logits/chosen": 0.8300418853759766, "logits/rejected": 0.8403892517089844, "logps/chosen": -2.254488945007324, "logps/rejected": -5.147127151489258, "loss": 0.7574, "nll_loss": 0.7380635142326355, "rewards/accuracies": 1.0, "rewards/chosen": -0.22544890642166138, "rewards/margins": 0.2892637848854065, "rewards/rejected": -0.5147126913070679, "step": 3021 }, { "epoch": 8.27378507871321, "grad_norm": 3.453458786010742, "learning_rate": 5.860273972602739e-07, "log_odds_chosen": 2.1639485359191895, "log_odds_ratio": -0.21314507722854614, "logits/chosen": 0.6722943782806396, "logits/rejected": 0.6718472242355347, "logps/chosen": -2.6090331077575684, "logps/rejected": -4.654407024383545, "loss": 0.6455, "nll_loss": 0.6241505742073059, "rewards/accuracies": 1.0, "rewards/chosen": -0.2609032988548279, "rewards/margins": 0.20453739166259766, "rewards/rejected": -0.4654407203197479, "step": 3022 }, { "epoch": 8.276522929500342, "grad_norm": 3.427081346511841, "learning_rate": 5.858904109589041e-07, "log_odds_chosen": 2.0803890228271484, "log_odds_ratio": -0.2569504678249359, "logits/chosen": 0.5200560092926025, "logits/rejected": 0.4451858699321747, "logps/chosen": -1.6057219505310059, "logps/rejected": -3.4861841201782227, "loss": 0.6967, "nll_loss": 0.6709796190261841, "rewards/accuracies": 1.0, "rewards/chosen": -0.16057218611240387, "rewards/margins": 0.18804621696472168, "rewards/rejected": -0.34861841797828674, "step": 3023 }, { "epoch": 8.279260780287474, "grad_norm": 3.0430612564086914, "learning_rate": 5.857534246575342e-07, "log_odds_chosen": 2.3388619422912598, "log_odds_ratio": -0.29229632019996643, "logits/chosen": 0.6693413853645325, "logits/rejected": 0.6552337408065796, "logps/chosen": -1.8692878484725952, "logps/rejected": -4.03571081161499, "loss": 0.7303, "nll_loss": 0.7010694146156311, "rewards/accuracies": 0.875, "rewards/chosen": -0.18692879378795624, "rewards/margins": 0.21664230525493622, "rewards/rejected": -0.40357106924057007, "step": 3024 }, { "epoch": 8.281998631074606, "grad_norm": 3.8650290966033936, "learning_rate": 5.856164383561644e-07, "log_odds_chosen": 0.9558528065681458, "log_odds_ratio": -0.3514452576637268, "logits/chosen": 0.9925737380981445, "logits/rejected": 0.9732375144958496, "logps/chosen": -2.3959648609161377, "logps/rejected": -3.2666702270507812, "loss": 0.6354, "nll_loss": 0.600247859954834, "rewards/accuracies": 1.0, "rewards/chosen": -0.23959650099277496, "rewards/margins": 0.0870705395936966, "rewards/rejected": -0.32666701078414917, "step": 3025 }, { "epoch": 8.284736481861739, "grad_norm": 4.2534637451171875, "learning_rate": 5.854794520547945e-07, "log_odds_chosen": 1.6239162683486938, "log_odds_ratio": -0.28676551580429077, "logits/chosen": 1.1610136032104492, "logits/rejected": 1.2245478630065918, "logps/chosen": -3.210050582885742, "logps/rejected": -4.770573139190674, "loss": 0.6528, "nll_loss": 0.624108612537384, "rewards/accuracies": 0.875, "rewards/chosen": -0.32100510597229004, "rewards/margins": 0.15605223178863525, "rewards/rejected": -0.4770573377609253, "step": 3026 }, { "epoch": 8.28747433264887, "grad_norm": 3.8991568088531494, "learning_rate": 5.853424657534246e-07, "log_odds_chosen": 2.489145278930664, "log_odds_ratio": -0.19144360721111298, "logits/chosen": 1.076961874961853, "logits/rejected": 1.1069159507751465, "logps/chosen": -2.707878589630127, "logps/rejected": -5.129716873168945, "loss": 0.7393, "nll_loss": 0.7201136350631714, "rewards/accuracies": 0.875, "rewards/chosen": -0.2707878351211548, "rewards/margins": 0.2421838641166687, "rewards/rejected": -0.5129717588424683, "step": 3027 }, { "epoch": 8.290212183436003, "grad_norm": 3.49702787399292, "learning_rate": 5.852054794520548e-07, "log_odds_chosen": 3.494265079498291, "log_odds_ratio": -0.14642825722694397, "logits/chosen": 0.8467240333557129, "logits/rejected": 0.8381621241569519, "logps/chosen": -2.2656476497650146, "logps/rejected": -5.524960041046143, "loss": 0.6114, "nll_loss": 0.5967220664024353, "rewards/accuracies": 1.0, "rewards/chosen": -0.22656477987766266, "rewards/margins": 0.32593125104904175, "rewards/rejected": -0.5524960160255432, "step": 3028 }, { "epoch": 8.292950034223136, "grad_norm": 3.4726898670196533, "learning_rate": 5.850684931506848e-07, "log_odds_chosen": 3.3580899238586426, "log_odds_ratio": -0.1772870272397995, "logits/chosen": 1.0163803100585938, "logits/rejected": 1.0688021183013916, "logps/chosen": -2.1722159385681152, "logps/rejected": -5.411931037902832, "loss": 0.6737, "nll_loss": 0.6560036540031433, "rewards/accuracies": 1.0, "rewards/chosen": -0.21722161769866943, "rewards/margins": 0.3239715099334717, "rewards/rejected": -0.5411931276321411, "step": 3029 }, { "epoch": 8.295687885010267, "grad_norm": 3.5222795009613037, "learning_rate": 5.84931506849315e-07, "log_odds_chosen": 2.2551956176757812, "log_odds_ratio": -0.3348795473575592, "logits/chosen": 0.8111176490783691, "logits/rejected": 0.7640581130981445, "logps/chosen": -1.2812883853912354, "logps/rejected": -3.3303587436676025, "loss": 0.7062, "nll_loss": 0.6726697683334351, "rewards/accuracies": 1.0, "rewards/chosen": -0.12812884151935577, "rewards/margins": 0.20490702986717224, "rewards/rejected": -0.3330358564853668, "step": 3030 }, { "epoch": 8.2984257357974, "grad_norm": 3.879384756088257, "learning_rate": 5.847945205479452e-07, "log_odds_chosen": 1.9379104375839233, "log_odds_ratio": -0.23150551319122314, "logits/chosen": 1.081136703491211, "logits/rejected": 1.088165521621704, "logps/chosen": -2.0763742923736572, "logps/rejected": -3.875441074371338, "loss": 0.657, "nll_loss": 0.6338818073272705, "rewards/accuracies": 1.0, "rewards/chosen": -0.20763742923736572, "rewards/margins": 0.1799066960811615, "rewards/rejected": -0.3875441253185272, "step": 3031 }, { "epoch": 8.301163586584531, "grad_norm": 3.920974016189575, "learning_rate": 5.846575342465753e-07, "log_odds_chosen": 3.0592732429504395, "log_odds_ratio": -0.2934495806694031, "logits/chosen": 0.9815160632133484, "logits/rejected": 0.8968294858932495, "logps/chosen": -1.8786556720733643, "logps/rejected": -4.5747880935668945, "loss": 0.6348, "nll_loss": 0.6054880619049072, "rewards/accuracies": 0.875, "rewards/chosen": -0.18786557018756866, "rewards/margins": 0.26961320638656616, "rewards/rejected": -0.45747876167297363, "step": 3032 }, { "epoch": 8.303901437371664, "grad_norm": 3.005289316177368, "learning_rate": 5.845205479452054e-07, "log_odds_chosen": 2.21313214302063, "log_odds_ratio": -0.24434369802474976, "logits/chosen": 1.0108658075332642, "logits/rejected": 0.9725697636604309, "logps/chosen": -1.4471367597579956, "logps/rejected": -3.3735880851745605, "loss": 0.5851, "nll_loss": 0.5606411695480347, "rewards/accuracies": 1.0, "rewards/chosen": -0.14471367001533508, "rewards/margins": 0.1926451325416565, "rewards/rejected": -0.33735883235931396, "step": 3033 }, { "epoch": 8.306639288158795, "grad_norm": 4.270702362060547, "learning_rate": 5.843835616438356e-07, "log_odds_chosen": 1.5971999168395996, "log_odds_ratio": -0.37587982416152954, "logits/chosen": 0.9824982285499573, "logits/rejected": 1.0019471645355225, "logps/chosen": -2.200857400894165, "logps/rejected": -3.7112607955932617, "loss": 0.5722, "nll_loss": 0.5346198081970215, "rewards/accuracies": 0.875, "rewards/chosen": -0.2200857549905777, "rewards/margins": 0.15104031562805176, "rewards/rejected": -0.37112605571746826, "step": 3034 }, { "epoch": 8.309377138945928, "grad_norm": 7.462213516235352, "learning_rate": 5.842465753424657e-07, "log_odds_chosen": 1.065429449081421, "log_odds_ratio": -0.6158553957939148, "logits/chosen": 0.7870514988899231, "logits/rejected": 0.8439743518829346, "logps/chosen": -2.579669713973999, "logps/rejected": -3.566779375076294, "loss": 0.7961, "nll_loss": 0.7345011234283447, "rewards/accuracies": 0.625, "rewards/chosen": -0.2579669952392578, "rewards/margins": 0.09871096163988113, "rewards/rejected": -0.35667791962623596, "step": 3035 }, { "epoch": 8.31211498973306, "grad_norm": 2.8975138664245605, "learning_rate": 5.841095890410958e-07, "log_odds_chosen": 1.740295171737671, "log_odds_ratio": -0.33347874879837036, "logits/chosen": 0.862694263458252, "logits/rejected": 0.8564431071281433, "logps/chosen": -1.8845410346984863, "logps/rejected": -3.510540723800659, "loss": 0.6141, "nll_loss": 0.580780029296875, "rewards/accuracies": 0.875, "rewards/chosen": -0.18845410645008087, "rewards/margins": 0.16259995102882385, "rewards/rejected": -0.3510540723800659, "step": 3036 }, { "epoch": 8.314852840520192, "grad_norm": 3.1763100624084473, "learning_rate": 5.83972602739726e-07, "log_odds_chosen": 1.5161256790161133, "log_odds_ratio": -0.30363309383392334, "logits/chosen": 0.9389212131500244, "logits/rejected": 0.890353798866272, "logps/chosen": -1.792008638381958, "logps/rejected": -3.1080758571624756, "loss": 0.63, "nll_loss": 0.5996153354644775, "rewards/accuracies": 1.0, "rewards/chosen": -0.17920087277889252, "rewards/margins": 0.13160674273967743, "rewards/rejected": -0.31080758571624756, "step": 3037 }, { "epoch": 8.317590691307323, "grad_norm": 3.966671943664551, "learning_rate": 5.838356164383561e-07, "log_odds_chosen": 1.5336310863494873, "log_odds_ratio": -0.25937551259994507, "logits/chosen": 0.8751792907714844, "logits/rejected": 0.8424183130264282, "logps/chosen": -1.4146575927734375, "logps/rejected": -2.730462074279785, "loss": 0.5339, "nll_loss": 0.5080059170722961, "rewards/accuracies": 1.0, "rewards/chosen": -0.14146575331687927, "rewards/margins": 0.1315804421901703, "rewards/rejected": -0.27304622530937195, "step": 3038 }, { "epoch": 8.320328542094456, "grad_norm": 3.681145429611206, "learning_rate": 5.836986301369863e-07, "log_odds_chosen": 1.282397747039795, "log_odds_ratio": -0.31646305322647095, "logits/chosen": 0.6548173427581787, "logits/rejected": 0.5372676253318787, "logps/chosen": -1.7412513494491577, "logps/rejected": -2.872468948364258, "loss": 0.6545, "nll_loss": 0.6228411197662354, "rewards/accuracies": 1.0, "rewards/chosen": -0.17412513494491577, "rewards/margins": 0.11312174797058105, "rewards/rejected": -0.2872468829154968, "step": 3039 }, { "epoch": 8.323066392881588, "grad_norm": 4.217599391937256, "learning_rate": 5.835616438356164e-07, "log_odds_chosen": 2.4133405685424805, "log_odds_ratio": -0.30229640007019043, "logits/chosen": 1.1074999570846558, "logits/rejected": 1.0290288925170898, "logps/chosen": -1.9899253845214844, "logps/rejected": -4.267975807189941, "loss": 0.6452, "nll_loss": 0.6149663925170898, "rewards/accuracies": 1.0, "rewards/chosen": -0.1989925354719162, "rewards/margins": 0.22780507802963257, "rewards/rejected": -0.4267975986003876, "step": 3040 }, { "epoch": 8.32580424366872, "grad_norm": 3.328014850616455, "learning_rate": 5.834246575342465e-07, "log_odds_chosen": 1.6438008546829224, "log_odds_ratio": -0.2614983916282654, "logits/chosen": 0.7762401103973389, "logits/rejected": 0.6859453916549683, "logps/chosen": -2.056849718093872, "logps/rejected": -3.601044178009033, "loss": 0.6693, "nll_loss": 0.643121600151062, "rewards/accuracies": 0.875, "rewards/chosen": -0.20568498969078064, "rewards/margins": 0.1544194370508194, "rewards/rejected": -0.36010444164276123, "step": 3041 }, { "epoch": 8.328542094455852, "grad_norm": 2.9289917945861816, "learning_rate": 5.832876712328767e-07, "log_odds_chosen": 4.118279933929443, "log_odds_ratio": -0.2033705711364746, "logits/chosen": 0.5716865658760071, "logits/rejected": 0.5345335006713867, "logps/chosen": -1.9728529453277588, "logps/rejected": -5.950724124908447, "loss": 0.6869, "nll_loss": 0.6665868759155273, "rewards/accuracies": 1.0, "rewards/chosen": -0.19728532433509827, "rewards/margins": 0.39778709411621094, "rewards/rejected": -0.5950723886489868, "step": 3042 }, { "epoch": 8.331279945242985, "grad_norm": 3.5365872383117676, "learning_rate": 5.831506849315068e-07, "log_odds_chosen": 3.376434803009033, "log_odds_ratio": -0.1461002230644226, "logits/chosen": 0.894895076751709, "logits/rejected": 0.9032014012336731, "logps/chosen": -2.0368595123291016, "logps/rejected": -5.118139743804932, "loss": 0.5701, "nll_loss": 0.5554696321487427, "rewards/accuracies": 1.0, "rewards/chosen": -0.2036859691143036, "rewards/margins": 0.3081279993057251, "rewards/rejected": -0.5118139982223511, "step": 3043 }, { "epoch": 8.334017796030116, "grad_norm": 3.1600496768951416, "learning_rate": 5.830136986301369e-07, "log_odds_chosen": 2.487135648727417, "log_odds_ratio": -0.287651002407074, "logits/chosen": 0.7999086976051331, "logits/rejected": 0.7912290096282959, "logps/chosen": -1.7664244174957275, "logps/rejected": -4.120819091796875, "loss": 0.7127, "nll_loss": 0.6838992834091187, "rewards/accuracies": 1.0, "rewards/chosen": -0.17664241790771484, "rewards/margins": 0.2354394644498825, "rewards/rejected": -0.41208189725875854, "step": 3044 }, { "epoch": 8.336755646817249, "grad_norm": 5.416269302368164, "learning_rate": 5.828767123287671e-07, "log_odds_chosen": 3.1773343086242676, "log_odds_ratio": -0.5300644636154175, "logits/chosen": 1.193885087966919, "logits/rejected": 1.2353556156158447, "logps/chosen": -2.778235912322998, "logps/rejected": -5.692549705505371, "loss": 0.7539, "nll_loss": 0.7008640766143799, "rewards/accuracies": 0.75, "rewards/chosen": -0.2778235971927643, "rewards/margins": 0.29143136739730835, "rewards/rejected": -0.569254994392395, "step": 3045 }, { "epoch": 8.33949349760438, "grad_norm": 3.3039891719818115, "learning_rate": 5.827397260273972e-07, "log_odds_chosen": 5.356414794921875, "log_odds_ratio": -0.03746877238154411, "logits/chosen": 1.2549726963043213, "logits/rejected": 1.3170325756072998, "logps/chosen": -1.8536500930786133, "logps/rejected": -7.021343231201172, "loss": 0.6155, "nll_loss": 0.6117533445358276, "rewards/accuracies": 1.0, "rewards/chosen": -0.1853650063276291, "rewards/margins": 0.5167694091796875, "rewards/rejected": -0.702134370803833, "step": 3046 }, { "epoch": 8.342231348391513, "grad_norm": 3.1495330333709717, "learning_rate": 5.826027397260273e-07, "log_odds_chosen": 3.7560431957244873, "log_odds_ratio": -0.11856628954410553, "logits/chosen": 0.7704240083694458, "logits/rejected": 0.6739927530288696, "logps/chosen": -1.4399547576904297, "logps/rejected": -4.892064094543457, "loss": 0.6589, "nll_loss": 0.6470221281051636, "rewards/accuracies": 1.0, "rewards/chosen": -0.1439954936504364, "rewards/margins": 0.3452109396457672, "rewards/rejected": -0.4892064332962036, "step": 3047 }, { "epoch": 8.344969199178644, "grad_norm": 12.385416984558105, "learning_rate": 5.824657534246575e-07, "log_odds_chosen": -0.06518027186393738, "log_odds_ratio": -1.1768381595611572, "logits/chosen": 0.6007270216941833, "logits/rejected": 0.5462700724601746, "logps/chosen": -3.071220636367798, "logps/rejected": -2.964341402053833, "loss": 0.8013, "nll_loss": 0.683630645275116, "rewards/accuracies": 0.375, "rewards/chosen": -0.30712205171585083, "rewards/margins": -0.010687913745641708, "rewards/rejected": -0.2964341640472412, "step": 3048 }, { "epoch": 8.347707049965777, "grad_norm": 6.0057854652404785, "learning_rate": 5.823287671232876e-07, "log_odds_chosen": 2.9648587703704834, "log_odds_ratio": -0.24238522350788116, "logits/chosen": 1.1245684623718262, "logits/rejected": 1.098983883857727, "logps/chosen": -2.0803184509277344, "logps/rejected": -4.94355583190918, "loss": 0.7154, "nll_loss": 0.6911493539810181, "rewards/accuracies": 0.875, "rewards/chosen": -0.20803186297416687, "rewards/margins": 0.28632375597953796, "rewards/rejected": -0.49435561895370483, "step": 3049 }, { "epoch": 8.350444900752908, "grad_norm": 3.087675094604492, "learning_rate": 5.821917808219177e-07, "log_odds_chosen": 1.964727520942688, "log_odds_ratio": -0.20480839908123016, "logits/chosen": 0.8097463846206665, "logits/rejected": 0.7776744365692139, "logps/chosen": -1.4133074283599854, "logps/rejected": -3.1170639991760254, "loss": 0.6121, "nll_loss": 0.5915839672088623, "rewards/accuracies": 1.0, "rewards/chosen": -0.14133073389530182, "rewards/margins": 0.17037567496299744, "rewards/rejected": -0.31170642375946045, "step": 3050 }, { "epoch": 8.353182751540041, "grad_norm": 3.3221545219421387, "learning_rate": 5.820547945205479e-07, "log_odds_chosen": 1.9392483234405518, "log_odds_ratio": -0.318097859621048, "logits/chosen": 0.902276337146759, "logits/rejected": 0.836315929889679, "logps/chosen": -1.4617501497268677, "logps/rejected": -3.223176956176758, "loss": 0.5606, "nll_loss": 0.5287792682647705, "rewards/accuracies": 1.0, "rewards/chosen": -0.14617502689361572, "rewards/margins": 0.17614266276359558, "rewards/rejected": -0.3223177194595337, "step": 3051 }, { "epoch": 8.355920602327172, "grad_norm": 3.262361526489258, "learning_rate": 5.81917808219178e-07, "log_odds_chosen": 3.0035276412963867, "log_odds_ratio": -0.14666211605072021, "logits/chosen": 0.7892049551010132, "logits/rejected": 0.7047415971755981, "logps/chosen": -1.3708174228668213, "logps/rejected": -4.12114143371582, "loss": 0.635, "nll_loss": 0.6203368902206421, "rewards/accuracies": 1.0, "rewards/chosen": -0.13708174228668213, "rewards/margins": 0.2750324010848999, "rewards/rejected": -0.41211414337158203, "step": 3052 }, { "epoch": 8.358658453114305, "grad_norm": 4.067693710327148, "learning_rate": 5.817808219178082e-07, "log_odds_chosen": 1.7869216203689575, "log_odds_ratio": -0.28115904331207275, "logits/chosen": 1.0740717649459839, "logits/rejected": 1.0801314115524292, "logps/chosen": -2.081223249435425, "logps/rejected": -3.7564406394958496, "loss": 0.6187, "nll_loss": 0.59062659740448, "rewards/accuracies": 1.0, "rewards/chosen": -0.2081223428249359, "rewards/margins": 0.16752175986766815, "rewards/rejected": -0.37564408779144287, "step": 3053 }, { "epoch": 8.361396303901437, "grad_norm": 3.203552007675171, "learning_rate": 5.816438356164383e-07, "log_odds_chosen": 1.7459654808044434, "log_odds_ratio": -0.3816983103752136, "logits/chosen": 0.7227994203567505, "logits/rejected": 0.6875051259994507, "logps/chosen": -2.03324556350708, "logps/rejected": -3.7054684162139893, "loss": 0.7311, "nll_loss": 0.6929529905319214, "rewards/accuracies": 0.625, "rewards/chosen": -0.203324556350708, "rewards/margins": 0.167222261428833, "rewards/rejected": -0.3705468475818634, "step": 3054 }, { "epoch": 8.36413415468857, "grad_norm": 3.310048818588257, "learning_rate": 5.815068493150684e-07, "log_odds_chosen": 3.400437116622925, "log_odds_ratio": -0.18728995323181152, "logits/chosen": 0.8278198838233948, "logits/rejected": 0.80082106590271, "logps/chosen": -1.7174869775772095, "logps/rejected": -4.9230265617370605, "loss": 0.63, "nll_loss": 0.6113065481185913, "rewards/accuracies": 1.0, "rewards/chosen": -0.17174869775772095, "rewards/margins": 0.3205539584159851, "rewards/rejected": -0.49230265617370605, "step": 3055 }, { "epoch": 8.366872005475702, "grad_norm": 3.314056634902954, "learning_rate": 5.813698630136986e-07, "log_odds_chosen": 2.140360116958618, "log_odds_ratio": -0.16848266124725342, "logits/chosen": 0.5295863151550293, "logits/rejected": 0.41537103056907654, "logps/chosen": -1.3689671754837036, "logps/rejected": -3.256582260131836, "loss": 0.636, "nll_loss": 0.6191773414611816, "rewards/accuracies": 1.0, "rewards/chosen": -0.13689672946929932, "rewards/margins": 0.18876150250434875, "rewards/rejected": -0.32565823197364807, "step": 3056 }, { "epoch": 8.369609856262834, "grad_norm": 3.224280595779419, "learning_rate": 5.812328767123287e-07, "log_odds_chosen": 1.4408657550811768, "log_odds_ratio": -0.30820804834365845, "logits/chosen": 0.9711053967475891, "logits/rejected": 0.9562248587608337, "logps/chosen": -1.5921690464019775, "logps/rejected": -2.8322556018829346, "loss": 0.5764, "nll_loss": 0.5456031560897827, "rewards/accuracies": 1.0, "rewards/chosen": -0.15921691060066223, "rewards/margins": 0.1240086555480957, "rewards/rejected": -0.28322556614875793, "step": 3057 }, { "epoch": 8.372347707049967, "grad_norm": 3.5055036544799805, "learning_rate": 5.810958904109588e-07, "log_odds_chosen": 1.5251339673995972, "log_odds_ratio": -0.3251259922981262, "logits/chosen": 0.5024486780166626, "logits/rejected": 0.4757946729660034, "logps/chosen": -1.9757009744644165, "logps/rejected": -3.4136061668395996, "loss": 0.6387, "nll_loss": 0.6061879992485046, "rewards/accuracies": 0.875, "rewards/chosen": -0.1975700855255127, "rewards/margins": 0.14379051327705383, "rewards/rejected": -0.3413606286048889, "step": 3058 }, { "epoch": 8.375085557837098, "grad_norm": 3.3270962238311768, "learning_rate": 5.80958904109589e-07, "log_odds_chosen": 1.1736910343170166, "log_odds_ratio": -0.40129682421684265, "logits/chosen": 0.695307731628418, "logits/rejected": 0.6780605912208557, "logps/chosen": -1.8130310773849487, "logps/rejected": -2.8534231185913086, "loss": 0.6271, "nll_loss": 0.586987316608429, "rewards/accuracies": 0.75, "rewards/chosen": -0.18130309879779816, "rewards/margins": 0.10403919965028763, "rewards/rejected": -0.2853423058986664, "step": 3059 }, { "epoch": 8.37782340862423, "grad_norm": 2.9221136569976807, "learning_rate": 5.808219178082191e-07, "log_odds_chosen": 1.8561866283416748, "log_odds_ratio": -0.3244907259941101, "logits/chosen": 0.8639050722122192, "logits/rejected": 0.8647921085357666, "logps/chosen": -2.560858726501465, "logps/rejected": -4.366884708404541, "loss": 0.6436, "nll_loss": 0.6111737489700317, "rewards/accuracies": 0.875, "rewards/chosen": -0.2560858726501465, "rewards/margins": 0.18060258030891418, "rewards/rejected": -0.43668848276138306, "step": 3060 }, { "epoch": 8.380561259411362, "grad_norm": 5.071935653686523, "learning_rate": 5.806849315068492e-07, "log_odds_chosen": 2.0490455627441406, "log_odds_ratio": -0.7207313776016235, "logits/chosen": 0.985350489616394, "logits/rejected": 0.9120067358016968, "logps/chosen": -2.4444990158081055, "logps/rejected": -4.439804553985596, "loss": 0.706, "nll_loss": 0.6339278221130371, "rewards/accuracies": 0.75, "rewards/chosen": -0.2444498986005783, "rewards/margins": 0.19953054189682007, "rewards/rejected": -0.44398045539855957, "step": 3061 }, { "epoch": 8.383299110198495, "grad_norm": 4.156285285949707, "learning_rate": 5.805479452054795e-07, "log_odds_chosen": 2.229672431945801, "log_odds_ratio": -0.5908890962600708, "logits/chosen": 0.5897194147109985, "logits/rejected": 0.5791192650794983, "logps/chosen": -2.406461238861084, "logps/rejected": -4.55682897567749, "loss": 0.7076, "nll_loss": 0.6485484838485718, "rewards/accuracies": 0.875, "rewards/chosen": -0.2406461238861084, "rewards/margins": 0.21503682434558868, "rewards/rejected": -0.4556829333305359, "step": 3062 }, { "epoch": 8.386036960985626, "grad_norm": 3.1202595233917236, "learning_rate": 5.804109589041095e-07, "log_odds_chosen": 2.581132411956787, "log_odds_ratio": -0.18607178330421448, "logits/chosen": 0.8115748167037964, "logits/rejected": 0.7242305874824524, "logps/chosen": -1.6088292598724365, "logps/rejected": -3.9408352375030518, "loss": 0.6109, "nll_loss": 0.5922696590423584, "rewards/accuracies": 1.0, "rewards/chosen": -0.16088293492794037, "rewards/margins": 0.23320059478282928, "rewards/rejected": -0.39408349990844727, "step": 3063 }, { "epoch": 8.388774811772759, "grad_norm": 3.0738964080810547, "learning_rate": 5.802739726027396e-07, "log_odds_chosen": 3.504295825958252, "log_odds_ratio": -0.2708284854888916, "logits/chosen": 1.0780386924743652, "logits/rejected": 1.0200400352478027, "logps/chosen": -1.642545223236084, "logps/rejected": -4.949483394622803, "loss": 0.6977, "nll_loss": 0.6706128120422363, "rewards/accuracies": 0.875, "rewards/chosen": -0.16425451636314392, "rewards/margins": 0.3306938409805298, "rewards/rejected": -0.4949483275413513, "step": 3064 }, { "epoch": 8.39151266255989, "grad_norm": 6.277449607849121, "learning_rate": 5.801369863013699e-07, "log_odds_chosen": 1.5581238269805908, "log_odds_ratio": -0.5078384280204773, "logits/chosen": 0.6811613440513611, "logits/rejected": 0.6150915026664734, "logps/chosen": -1.7629714012145996, "logps/rejected": -3.1961488723754883, "loss": 0.6378, "nll_loss": 0.5870539546012878, "rewards/accuracies": 0.75, "rewards/chosen": -0.176297128200531, "rewards/margins": 0.14331777393817902, "rewards/rejected": -0.3196149170398712, "step": 3065 }, { "epoch": 8.394250513347023, "grad_norm": 4.530598163604736, "learning_rate": 5.8e-07, "log_odds_chosen": 3.8134496212005615, "log_odds_ratio": -0.21680741012096405, "logits/chosen": 0.9642048478126526, "logits/rejected": 1.0290582180023193, "logps/chosen": -2.6143360137939453, "logps/rejected": -6.317904472351074, "loss": 0.7422, "nll_loss": 0.7205109596252441, "rewards/accuracies": 0.875, "rewards/chosen": -0.26143360137939453, "rewards/margins": 0.37035685777664185, "rewards/rejected": -0.6317905187606812, "step": 3066 }, { "epoch": 8.396988364134154, "grad_norm": 3.4642229080200195, "learning_rate": 5.798630136986302e-07, "log_odds_chosen": 2.275439739227295, "log_odds_ratio": -0.2858530282974243, "logits/chosen": 0.9068977236747742, "logits/rejected": 0.8744752407073975, "logps/chosen": -2.123528242111206, "logps/rejected": -4.256279468536377, "loss": 0.7529, "nll_loss": 0.7243250608444214, "rewards/accuracies": 0.75, "rewards/chosen": -0.21235282719135284, "rewards/margins": 0.21327514946460724, "rewards/rejected": -0.4256279766559601, "step": 3067 }, { "epoch": 8.399726214921287, "grad_norm": 3.7233834266662598, "learning_rate": 5.797260273972603e-07, "log_odds_chosen": 2.035299777984619, "log_odds_ratio": -0.26388445496559143, "logits/chosen": 0.7359892129898071, "logits/rejected": 0.6717519164085388, "logps/chosen": -1.501852035522461, "logps/rejected": -3.3354148864746094, "loss": 0.6163, "nll_loss": 0.5899388790130615, "rewards/accuracies": 1.0, "rewards/chosen": -0.15018519759178162, "rewards/margins": 0.18335629999637604, "rewards/rejected": -0.33354151248931885, "step": 3068 }, { "epoch": 8.402464065708418, "grad_norm": 3.058497190475464, "learning_rate": 5.795890410958904e-07, "log_odds_chosen": 2.952787160873413, "log_odds_ratio": -0.19139829277992249, "logits/chosen": 0.6693960428237915, "logits/rejected": 0.7195366024971008, "logps/chosen": -1.5222601890563965, "logps/rejected": -4.208710670471191, "loss": 0.6591, "nll_loss": 0.6399897336959839, "rewards/accuracies": 1.0, "rewards/chosen": -0.1522260159254074, "rewards/margins": 0.2686450481414795, "rewards/rejected": -0.4208710789680481, "step": 3069 }, { "epoch": 8.405201916495551, "grad_norm": 3.3890180587768555, "learning_rate": 5.794520547945206e-07, "log_odds_chosen": 1.5191055536270142, "log_odds_ratio": -0.3064554035663605, "logits/chosen": 0.7383834719657898, "logits/rejected": 0.646500289440155, "logps/chosen": -2.28719162940979, "logps/rejected": -3.691559314727783, "loss": 0.678, "nll_loss": 0.6473778486251831, "rewards/accuracies": 0.75, "rewards/chosen": -0.22871917486190796, "rewards/margins": 0.14043676853179932, "rewards/rejected": -0.3691559433937073, "step": 3070 }, { "epoch": 8.407939767282683, "grad_norm": 3.88301944732666, "learning_rate": 5.793150684931507e-07, "log_odds_chosen": 1.674478530883789, "log_odds_ratio": -0.32268714904785156, "logits/chosen": 0.717717170715332, "logits/rejected": 0.6240893006324768, "logps/chosen": -1.3683961629867554, "logps/rejected": -2.883028030395508, "loss": 0.7102, "nll_loss": 0.6779488325119019, "rewards/accuracies": 0.875, "rewards/chosen": -0.1368396282196045, "rewards/margins": 0.15146319568157196, "rewards/rejected": -0.28830283880233765, "step": 3071 }, { "epoch": 8.410677618069816, "grad_norm": 3.3764984607696533, "learning_rate": 5.791780821917808e-07, "log_odds_chosen": 1.7068121433258057, "log_odds_ratio": -0.25987470149993896, "logits/chosen": 0.800693929195404, "logits/rejected": 0.7269355058670044, "logps/chosen": -1.6460800170898438, "logps/rejected": -3.126570701599121, "loss": 0.6125, "nll_loss": 0.5865160822868347, "rewards/accuracies": 1.0, "rewards/chosen": -0.16460800170898438, "rewards/margins": 0.14804908633232117, "rewards/rejected": -0.31265708804130554, "step": 3072 }, { "epoch": 8.413415468856947, "grad_norm": 3.6068241596221924, "learning_rate": 5.79041095890411e-07, "log_odds_chosen": 2.019116163253784, "log_odds_ratio": -0.2689305543899536, "logits/chosen": 0.8665017485618591, "logits/rejected": 0.8210824728012085, "logps/chosen": -1.9296917915344238, "logps/rejected": -3.758802652359009, "loss": 0.5681, "nll_loss": 0.5411812663078308, "rewards/accuracies": 1.0, "rewards/chosen": -0.1929691731929779, "rewards/margins": 0.18291108310222626, "rewards/rejected": -0.37588024139404297, "step": 3073 }, { "epoch": 8.41615331964408, "grad_norm": 3.3691964149475098, "learning_rate": 5.789041095890412e-07, "log_odds_chosen": 2.93809175491333, "log_odds_ratio": -0.25967568159103394, "logits/chosen": 0.7963718771934509, "logits/rejected": 0.7455029487609863, "logps/chosen": -1.988645076751709, "logps/rejected": -4.7626543045043945, "loss": 0.6996, "nll_loss": 0.6736273765563965, "rewards/accuracies": 1.0, "rewards/chosen": -0.19886453449726105, "rewards/margins": 0.2774008810520172, "rewards/rejected": -0.47626543045043945, "step": 3074 }, { "epoch": 8.41889117043121, "grad_norm": 4.536793231964111, "learning_rate": 5.787671232876712e-07, "log_odds_chosen": 1.0089757442474365, "log_odds_ratio": -0.6217228174209595, "logits/chosen": 0.8580519556999207, "logits/rejected": 0.7858104109764099, "logps/chosen": -2.5681650638580322, "logps/rejected": -3.482424259185791, "loss": 0.6987, "nll_loss": 0.6364940404891968, "rewards/accuracies": 0.625, "rewards/chosen": -0.2568165063858032, "rewards/margins": 0.09142593294382095, "rewards/rejected": -0.3482424020767212, "step": 3075 }, { "epoch": 8.421629021218344, "grad_norm": 3.0774614810943604, "learning_rate": 5.786301369863014e-07, "log_odds_chosen": 3.2252068519592285, "log_odds_ratio": -0.18932217359542847, "logits/chosen": 0.7980287075042725, "logits/rejected": 0.717244565486908, "logps/chosen": -1.5196003913879395, "logps/rejected": -4.44175910949707, "loss": 0.6376, "nll_loss": 0.6186861395835876, "rewards/accuracies": 0.875, "rewards/chosen": -0.15196003019809723, "rewards/margins": 0.29221588373184204, "rewards/rejected": -0.44417592883110046, "step": 3076 }, { "epoch": 8.424366872005475, "grad_norm": 3.725334644317627, "learning_rate": 5.784931506849315e-07, "log_odds_chosen": 1.7052526473999023, "log_odds_ratio": -0.3995075225830078, "logits/chosen": 0.7182216048240662, "logits/rejected": 0.6593365669250488, "logps/chosen": -2.380415678024292, "logps/rejected": -4.016910552978516, "loss": 0.6626, "nll_loss": 0.6226272583007812, "rewards/accuracies": 0.75, "rewards/chosen": -0.23804157972335815, "rewards/margins": 0.16364949941635132, "rewards/rejected": -0.4016910791397095, "step": 3077 }, { "epoch": 8.427104722792608, "grad_norm": 5.173285484313965, "learning_rate": 5.783561643835616e-07, "log_odds_chosen": 2.5783963203430176, "log_odds_ratio": -0.2452869564294815, "logits/chosen": 0.6799720525741577, "logits/rejected": 0.6788665056228638, "logps/chosen": -1.9756439924240112, "logps/rejected": -4.389291763305664, "loss": 0.6945, "nll_loss": 0.6699291467666626, "rewards/accuracies": 1.0, "rewards/chosen": -0.19756439328193665, "rewards/margins": 0.24136477708816528, "rewards/rejected": -0.4389292001724243, "step": 3078 }, { "epoch": 8.429842573579739, "grad_norm": 3.30611515045166, "learning_rate": 5.782191780821918e-07, "log_odds_chosen": 3.9880452156066895, "log_odds_ratio": -0.12012729048728943, "logits/chosen": 1.064884066581726, "logits/rejected": 1.1139460802078247, "logps/chosen": -1.6005617380142212, "logps/rejected": -5.3191328048706055, "loss": 0.578, "nll_loss": 0.5659856796264648, "rewards/accuracies": 1.0, "rewards/chosen": -0.16005617380142212, "rewards/margins": 0.3718571364879608, "rewards/rejected": -0.5319132804870605, "step": 3079 }, { "epoch": 8.432580424366872, "grad_norm": 3.441826105117798, "learning_rate": 5.780821917808219e-07, "log_odds_chosen": 0.4979970157146454, "log_odds_ratio": -0.5451549291610718, "logits/chosen": 0.45347732305526733, "logits/rejected": 0.5583781003952026, "logps/chosen": -1.7957277297973633, "logps/rejected": -2.255727767944336, "loss": 0.6866, "nll_loss": 0.6321055293083191, "rewards/accuracies": 0.625, "rewards/chosen": -0.17957279086112976, "rewards/margins": 0.04599998891353607, "rewards/rejected": -0.22557277977466583, "step": 3080 }, { "epoch": 8.435318275154003, "grad_norm": 4.35371732711792, "learning_rate": 5.779452054794521e-07, "log_odds_chosen": 1.3984434604644775, "log_odds_ratio": -0.3555459976196289, "logits/chosen": 0.8866639733314514, "logits/rejected": 0.8493393659591675, "logps/chosen": -1.4558625221252441, "logps/rejected": -2.695197105407715, "loss": 0.5843, "nll_loss": 0.5487885475158691, "rewards/accuracies": 0.875, "rewards/chosen": -0.14558625221252441, "rewards/margins": 0.12393344193696976, "rewards/rejected": -0.2695196866989136, "step": 3081 }, { "epoch": 8.438056125941136, "grad_norm": 3.1074743270874023, "learning_rate": 5.778082191780822e-07, "log_odds_chosen": 3.0316927433013916, "log_odds_ratio": -0.21573734283447266, "logits/chosen": 0.7045693397521973, "logits/rejected": 0.665095329284668, "logps/chosen": -2.123196601867676, "logps/rejected": -5.009018421173096, "loss": 0.7308, "nll_loss": 0.7091838121414185, "rewards/accuracies": 0.875, "rewards/chosen": -0.21231967210769653, "rewards/margins": 0.2885821759700775, "rewards/rejected": -0.5009018778800964, "step": 3082 }, { "epoch": 8.44079397672827, "grad_norm": 3.949934244155884, "learning_rate": 5.776712328767123e-07, "log_odds_chosen": 2.1572155952453613, "log_odds_ratio": -0.29510021209716797, "logits/chosen": 1.0659738779067993, "logits/rejected": 1.0602376461029053, "logps/chosen": -1.775595784187317, "logps/rejected": -3.7377982139587402, "loss": 0.6211, "nll_loss": 0.5916215181350708, "rewards/accuracies": 0.875, "rewards/chosen": -0.17755958437919617, "rewards/margins": 0.1962202489376068, "rewards/rejected": -0.373779833316803, "step": 3083 }, { "epoch": 8.4435318275154, "grad_norm": 2.988966464996338, "learning_rate": 5.775342465753425e-07, "log_odds_chosen": 2.327819347381592, "log_odds_ratio": -0.2705535292625427, "logits/chosen": 0.846906840801239, "logits/rejected": 0.8218387365341187, "logps/chosen": -1.7998738288879395, "logps/rejected": -3.9400086402893066, "loss": 0.5582, "nll_loss": 0.5311431884765625, "rewards/accuracies": 0.875, "rewards/chosen": -0.179987370967865, "rewards/margins": 0.2140135020017624, "rewards/rejected": -0.3940008878707886, "step": 3084 }, { "epoch": 8.446269678302533, "grad_norm": 3.453885555267334, "learning_rate": 5.773972602739726e-07, "log_odds_chosen": 2.7689785957336426, "log_odds_ratio": -0.22850646078586578, "logits/chosen": 0.71601402759552, "logits/rejected": 0.6028590798377991, "logps/chosen": -2.0892345905303955, "logps/rejected": -4.721608638763428, "loss": 0.616, "nll_loss": 0.5931136012077332, "rewards/accuracies": 1.0, "rewards/chosen": -0.20892344415187836, "rewards/margins": 0.2632374167442322, "rewards/rejected": -0.47216087579727173, "step": 3085 }, { "epoch": 8.449007529089664, "grad_norm": 4.342767715454102, "learning_rate": 5.772602739726027e-07, "log_odds_chosen": 0.9594491720199585, "log_odds_ratio": -0.45274603366851807, "logits/chosen": 0.7364251613616943, "logits/rejected": 0.7339774370193481, "logps/chosen": -2.4234683513641357, "logps/rejected": -3.2830283641815186, "loss": 0.7135, "nll_loss": 0.668245792388916, "rewards/accuracies": 0.875, "rewards/chosen": -0.2423468381166458, "rewards/margins": 0.08595597743988037, "rewards/rejected": -0.32830286026000977, "step": 3086 }, { "epoch": 8.451745379876797, "grad_norm": 4.898403167724609, "learning_rate": 5.771232876712329e-07, "log_odds_chosen": 2.132896900177002, "log_odds_ratio": -0.3615337312221527, "logits/chosen": 1.2253769636154175, "logits/rejected": 1.3240957260131836, "logps/chosen": -2.9589407444000244, "logps/rejected": -4.88045597076416, "loss": 0.6495, "nll_loss": 0.6133721470832825, "rewards/accuracies": 0.875, "rewards/chosen": -0.2958940863609314, "rewards/margins": 0.19215154647827148, "rewards/rejected": -0.4880456328392029, "step": 3087 }, { "epoch": 8.454483230663929, "grad_norm": 4.473570346832275, "learning_rate": 5.769863013698631e-07, "log_odds_chosen": 2.3655240535736084, "log_odds_ratio": -0.3119088411331177, "logits/chosen": 0.7962612509727478, "logits/rejected": 0.7363371253013611, "logps/chosen": -1.5062341690063477, "logps/rejected": -3.6538519859313965, "loss": 0.6543, "nll_loss": 0.6230699419975281, "rewards/accuracies": 0.875, "rewards/chosen": -0.1506234109401703, "rewards/margins": 0.21476179361343384, "rewards/rejected": -0.3653852343559265, "step": 3088 }, { "epoch": 8.457221081451062, "grad_norm": 3.3881895542144775, "learning_rate": 5.768493150684931e-07, "log_odds_chosen": 1.9544594287872314, "log_odds_ratio": -0.18454387784004211, "logits/chosen": 0.8401486873626709, "logits/rejected": 0.8343268036842346, "logps/chosen": -1.6235828399658203, "logps/rejected": -3.378620147705078, "loss": 0.6013, "nll_loss": 0.5828863382339478, "rewards/accuracies": 1.0, "rewards/chosen": -0.16235828399658203, "rewards/margins": 0.17550374567508698, "rewards/rejected": -0.3378620147705078, "step": 3089 }, { "epoch": 8.459958932238193, "grad_norm": 3.2869906425476074, "learning_rate": 5.767123287671233e-07, "log_odds_chosen": 1.3305081129074097, "log_odds_ratio": -0.35556864738464355, "logits/chosen": 0.8169575929641724, "logits/rejected": 0.8441210389137268, "logps/chosen": -2.001880407333374, "logps/rejected": -3.181495428085327, "loss": 0.6614, "nll_loss": 0.625866174697876, "rewards/accuracies": 0.875, "rewards/chosen": -0.2001880556344986, "rewards/margins": 0.11796149611473083, "rewards/rejected": -0.3181495666503906, "step": 3090 }, { "epoch": 8.462696783025326, "grad_norm": 3.666231632232666, "learning_rate": 5.765753424657534e-07, "log_odds_chosen": 3.571929693222046, "log_odds_ratio": -0.12576794624328613, "logits/chosen": 0.9196552038192749, "logits/rejected": 0.9526489973068237, "logps/chosen": -2.856365203857422, "logps/rejected": -6.355915546417236, "loss": 0.7793, "nll_loss": 0.7666949033737183, "rewards/accuracies": 1.0, "rewards/chosen": -0.2856365144252777, "rewards/margins": 0.3499550223350525, "rewards/rejected": -0.6355915069580078, "step": 3091 }, { "epoch": 8.465434633812457, "grad_norm": 3.3865818977355957, "learning_rate": 5.764383561643835e-07, "log_odds_chosen": 2.9299936294555664, "log_odds_ratio": -0.1438441276550293, "logits/chosen": 0.8916741609573364, "logits/rejected": 0.896015465259552, "logps/chosen": -1.7854747772216797, "logps/rejected": -4.526400089263916, "loss": 0.68, "nll_loss": 0.6656023263931274, "rewards/accuracies": 1.0, "rewards/chosen": -0.1785474717617035, "rewards/margins": 0.27409252524375916, "rewards/rejected": -0.45263999700546265, "step": 3092 }, { "epoch": 8.46817248459959, "grad_norm": 3.0157113075256348, "learning_rate": 5.763013698630137e-07, "log_odds_chosen": 2.4771056175231934, "log_odds_ratio": -0.19088968634605408, "logits/chosen": 0.8731653094291687, "logits/rejected": 0.8670642375946045, "logps/chosen": -2.3862876892089844, "logps/rejected": -4.728116512298584, "loss": 0.6475, "nll_loss": 0.628447413444519, "rewards/accuracies": 1.0, "rewards/chosen": -0.23862877488136292, "rewards/margins": 0.2341829240322113, "rewards/rejected": -0.4728116989135742, "step": 3093 }, { "epoch": 8.470910335386721, "grad_norm": 3.3210880756378174, "learning_rate": 5.761643835616438e-07, "log_odds_chosen": 1.576488733291626, "log_odds_ratio": -0.24901477992534637, "logits/chosen": 0.8913544416427612, "logits/rejected": 0.8437372446060181, "logps/chosen": -1.2838678359985352, "logps/rejected": -2.630363702774048, "loss": 0.5416, "nll_loss": 0.5166904926300049, "rewards/accuracies": 1.0, "rewards/chosen": -0.12838679552078247, "rewards/margins": 0.13464957475662231, "rewards/rejected": -0.2630363702774048, "step": 3094 }, { "epoch": 8.473648186173854, "grad_norm": 3.1105034351348877, "learning_rate": 5.76027397260274e-07, "log_odds_chosen": 2.848182201385498, "log_odds_ratio": -0.21122458577156067, "logits/chosen": 1.020007848739624, "logits/rejected": 1.0152299404144287, "logps/chosen": -2.402132749557495, "logps/rejected": -5.128059387207031, "loss": 0.6215, "nll_loss": 0.6003805994987488, "rewards/accuracies": 0.875, "rewards/chosen": -0.24021326005458832, "rewards/margins": 0.2725927233695984, "rewards/rejected": -0.5128059983253479, "step": 3095 }, { "epoch": 8.476386036960985, "grad_norm": 3.5398941040039062, "learning_rate": 5.758904109589041e-07, "log_odds_chosen": 1.1276211738586426, "log_odds_ratio": -0.3237919807434082, "logits/chosen": 0.6922903060913086, "logits/rejected": 0.524936318397522, "logps/chosen": -1.4099822044372559, "logps/rejected": -2.3606326580047607, "loss": 0.6848, "nll_loss": 0.6524597406387329, "rewards/accuracies": 1.0, "rewards/chosen": -0.1409982144832611, "rewards/margins": 0.09506504237651825, "rewards/rejected": -0.23606327176094055, "step": 3096 }, { "epoch": 8.479123887748118, "grad_norm": 4.977277755737305, "learning_rate": 5.757534246575342e-07, "log_odds_chosen": 2.0813422203063965, "log_odds_ratio": -0.1806405931711197, "logits/chosen": 0.9187220335006714, "logits/rejected": 0.8509390354156494, "logps/chosen": -2.3074004650115967, "logps/rejected": -4.235292911529541, "loss": 0.8711, "nll_loss": 0.8530417084693909, "rewards/accuracies": 1.0, "rewards/chosen": -0.23074005544185638, "rewards/margins": 0.1927892565727234, "rewards/rejected": -0.42352932691574097, "step": 3097 }, { "epoch": 8.48186173853525, "grad_norm": 4.4600443840026855, "learning_rate": 5.756164383561644e-07, "log_odds_chosen": 2.146941661834717, "log_odds_ratio": -0.4176299273967743, "logits/chosen": 0.9457533359527588, "logits/rejected": 0.9560420513153076, "logps/chosen": -2.4637649059295654, "logps/rejected": -4.522036552429199, "loss": 0.6765, "nll_loss": 0.6347562670707703, "rewards/accuracies": 0.75, "rewards/chosen": -0.24637646973133087, "rewards/margins": 0.20582719147205353, "rewards/rejected": -0.4522036612033844, "step": 3098 }, { "epoch": 8.484599589322382, "grad_norm": 3.375394582748413, "learning_rate": 5.754794520547945e-07, "log_odds_chosen": 4.065858840942383, "log_odds_ratio": -0.27497613430023193, "logits/chosen": 0.7533082962036133, "logits/rejected": 0.7699906229972839, "logps/chosen": -2.057027816772461, "logps/rejected": -6.049588680267334, "loss": 0.6706, "nll_loss": 0.643097460269928, "rewards/accuracies": 0.875, "rewards/chosen": -0.2057027816772461, "rewards/margins": 0.39925616979599, "rewards/rejected": -0.6049588918685913, "step": 3099 }, { "epoch": 8.487337440109513, "grad_norm": 4.771612644195557, "learning_rate": 5.753424657534246e-07, "log_odds_chosen": 1.153929591178894, "log_odds_ratio": -0.5654070973396301, "logits/chosen": 0.6893377900123596, "logits/rejected": 0.6519327163696289, "logps/chosen": -2.098881721496582, "logps/rejected": -3.090193748474121, "loss": 0.6685, "nll_loss": 0.6119674444198608, "rewards/accuracies": 0.875, "rewards/chosen": -0.20988819003105164, "rewards/margins": 0.09913120418787003, "rewards/rejected": -0.30901938676834106, "step": 3100 }, { "epoch": 8.490075290896646, "grad_norm": 3.0876002311706543, "learning_rate": 5.752054794520548e-07, "log_odds_chosen": 2.6712241172790527, "log_odds_ratio": -0.20772787928581238, "logits/chosen": 0.8674798607826233, "logits/rejected": 0.7741250395774841, "logps/chosen": -1.9114798307418823, "logps/rejected": -4.435837745666504, "loss": 0.588, "nll_loss": 0.5672453045845032, "rewards/accuracies": 1.0, "rewards/chosen": -0.19114799797534943, "rewards/margins": 0.2524357736110687, "rewards/rejected": -0.44358378648757935, "step": 3101 }, { "epoch": 8.492813141683778, "grad_norm": 3.4082834720611572, "learning_rate": 5.75068493150685e-07, "log_odds_chosen": 3.946648597717285, "log_odds_ratio": -0.0726773738861084, "logits/chosen": 0.8542929291725159, "logits/rejected": 0.8490235209465027, "logps/chosen": -1.6925463676452637, "logps/rejected": -5.343433380126953, "loss": 0.5866, "nll_loss": 0.5793036222457886, "rewards/accuracies": 1.0, "rewards/chosen": -0.1692546308040619, "rewards/margins": 0.36508870124816895, "rewards/rejected": -0.5343433022499084, "step": 3102 }, { "epoch": 8.49555099247091, "grad_norm": 4.522981643676758, "learning_rate": 5.74931506849315e-07, "log_odds_chosen": 2.944304943084717, "log_odds_ratio": -0.07547248154878616, "logits/chosen": 1.1266982555389404, "logits/rejected": 1.1106324195861816, "logps/chosen": -1.8331820964813232, "logps/rejected": -4.5391130447387695, "loss": 0.6302, "nll_loss": 0.6226611137390137, "rewards/accuracies": 1.0, "rewards/chosen": -0.18331821262836456, "rewards/margins": 0.27059313654899597, "rewards/rejected": -0.45391133427619934, "step": 3103 }, { "epoch": 8.498288843258042, "grad_norm": 3.1728363037109375, "learning_rate": 5.747945205479452e-07, "log_odds_chosen": 1.9418143033981323, "log_odds_ratio": -0.20931318402290344, "logits/chosen": 0.8344656825065613, "logits/rejected": 0.7904857993125916, "logps/chosen": -2.5599477291107178, "logps/rejected": -4.36305046081543, "loss": 0.6541, "nll_loss": 0.6331205368041992, "rewards/accuracies": 1.0, "rewards/chosen": -0.2559947669506073, "rewards/margins": 0.18031030893325806, "rewards/rejected": -0.43630510568618774, "step": 3104 }, { "epoch": 8.501026694045175, "grad_norm": 4.032560348510742, "learning_rate": 5.746575342465754e-07, "log_odds_chosen": 1.8786795139312744, "log_odds_ratio": -0.2258574217557907, "logits/chosen": 0.5706880688667297, "logits/rejected": 0.4659644365310669, "logps/chosen": -1.3193011283874512, "logps/rejected": -2.8915655612945557, "loss": 0.6167, "nll_loss": 0.5941460132598877, "rewards/accuracies": 1.0, "rewards/chosen": -0.1319301277399063, "rewards/margins": 0.15722644329071045, "rewards/rejected": -0.28915655612945557, "step": 3105 }, { "epoch": 8.503764544832306, "grad_norm": 3.0741405487060547, "learning_rate": 5.745205479452054e-07, "log_odds_chosen": 1.924774169921875, "log_odds_ratio": -0.2607620358467102, "logits/chosen": 0.6800687313079834, "logits/rejected": 0.586281418800354, "logps/chosen": -1.7097527980804443, "logps/rejected": -3.461289882659912, "loss": 0.6593, "nll_loss": 0.6332383155822754, "rewards/accuracies": 0.875, "rewards/chosen": -0.17097529768943787, "rewards/margins": 0.1751536726951599, "rewards/rejected": -0.34612900018692017, "step": 3106 }, { "epoch": 8.506502395619439, "grad_norm": 4.596220016479492, "learning_rate": 5.743835616438356e-07, "log_odds_chosen": 3.373356819152832, "log_odds_ratio": -0.16470111906528473, "logits/chosen": 1.0943610668182373, "logits/rejected": 1.1297128200531006, "logps/chosen": -2.5488104820251465, "logps/rejected": -5.721161842346191, "loss": 0.6651, "nll_loss": 0.6486597061157227, "rewards/accuracies": 0.875, "rewards/chosen": -0.2548810541629791, "rewards/margins": 0.31723517179489136, "rewards/rejected": -0.5721161365509033, "step": 3107 }, { "epoch": 8.50924024640657, "grad_norm": 3.4557530879974365, "learning_rate": 5.742465753424657e-07, "log_odds_chosen": 2.510472297668457, "log_odds_ratio": -0.2733405828475952, "logits/chosen": 0.7830659747123718, "logits/rejected": 0.8169079422950745, "logps/chosen": -2.1978678703308105, "logps/rejected": -4.618124485015869, "loss": 0.7303, "nll_loss": 0.7029891610145569, "rewards/accuracies": 0.875, "rewards/chosen": -0.21978677809238434, "rewards/margins": 0.242025688290596, "rewards/rejected": -0.46181243658065796, "step": 3108 }, { "epoch": 8.511978097193703, "grad_norm": 4.620948314666748, "learning_rate": 5.741095890410958e-07, "log_odds_chosen": 1.6507067680358887, "log_odds_ratio": -0.38203945755958557, "logits/chosen": 0.8583530187606812, "logits/rejected": 0.9128165245056152, "logps/chosen": -2.4487969875335693, "logps/rejected": -4.011288642883301, "loss": 0.5764, "nll_loss": 0.5381525754928589, "rewards/accuracies": 0.875, "rewards/chosen": -0.24487972259521484, "rewards/margins": 0.15624913573265076, "rewards/rejected": -0.4011288583278656, "step": 3109 }, { "epoch": 8.514715947980836, "grad_norm": 3.0387096405029297, "learning_rate": 5.73972602739726e-07, "log_odds_chosen": 4.936116695404053, "log_odds_ratio": -0.11899832636117935, "logits/chosen": 0.9667428731918335, "logits/rejected": 0.9912765026092529, "logps/chosen": -1.7960374355316162, "logps/rejected": -6.5440592765808105, "loss": 0.5565, "nll_loss": 0.5446341037750244, "rewards/accuracies": 1.0, "rewards/chosen": -0.1796037256717682, "rewards/margins": 0.4748022258281708, "rewards/rejected": -0.654405951499939, "step": 3110 }, { "epoch": 8.517453798767967, "grad_norm": 2.9799349308013916, "learning_rate": 5.738356164383561e-07, "log_odds_chosen": 4.3116583824157715, "log_odds_ratio": -0.0505620539188385, "logits/chosen": 1.0775495767593384, "logits/rejected": 1.064490795135498, "logps/chosen": -2.5152385234832764, "logps/rejected": -6.683723449707031, "loss": 0.6968, "nll_loss": 0.6917181015014648, "rewards/accuracies": 1.0, "rewards/chosen": -0.25152385234832764, "rewards/margins": 0.4168485105037689, "rewards/rejected": -0.668372392654419, "step": 3111 }, { "epoch": 8.5201916495551, "grad_norm": 2.929727554321289, "learning_rate": 5.736986301369863e-07, "log_odds_chosen": 3.9104602336883545, "log_odds_ratio": -0.1597714126110077, "logits/chosen": 0.6373205184936523, "logits/rejected": 0.5870816111564636, "logps/chosen": -1.590601921081543, "logps/rejected": -5.204301357269287, "loss": 0.6815, "nll_loss": 0.665535569190979, "rewards/accuracies": 0.875, "rewards/chosen": -0.15906019508838654, "rewards/margins": 0.36136993765830994, "rewards/rejected": -0.5204301476478577, "step": 3112 }, { "epoch": 8.522929500342231, "grad_norm": 5.5442214012146, "learning_rate": 5.735616438356164e-07, "log_odds_chosen": 2.1551008224487305, "log_odds_ratio": -0.3832126259803772, "logits/chosen": 0.9847644567489624, "logits/rejected": 0.9936846494674683, "logps/chosen": -1.9542769193649292, "logps/rejected": -3.941084384918213, "loss": 0.6565, "nll_loss": 0.618146538734436, "rewards/accuracies": 0.875, "rewards/chosen": -0.19542768597602844, "rewards/margins": 0.19868077337741852, "rewards/rejected": -0.39410847425460815, "step": 3113 }, { "epoch": 8.525667351129364, "grad_norm": 2.9246578216552734, "learning_rate": 5.734246575342465e-07, "log_odds_chosen": 3.2360880374908447, "log_odds_ratio": -0.18071697652339935, "logits/chosen": 0.7939540147781372, "logits/rejected": 0.7846159338951111, "logps/chosen": -2.1763954162597656, "logps/rejected": -5.239911079406738, "loss": 0.6725, "nll_loss": 0.6543944478034973, "rewards/accuracies": 1.0, "rewards/chosen": -0.21763953566551208, "rewards/margins": 0.30635154247283936, "rewards/rejected": -0.5239911079406738, "step": 3114 }, { "epoch": 8.528405201916495, "grad_norm": 3.176029920578003, "learning_rate": 5.732876712328767e-07, "log_odds_chosen": 4.367135047912598, "log_odds_ratio": -0.13112318515777588, "logits/chosen": 0.8354706764221191, "logits/rejected": 0.8526755571365356, "logps/chosen": -2.1596882343292236, "logps/rejected": -6.3641037940979, "loss": 0.7171, "nll_loss": 0.7040053606033325, "rewards/accuracies": 1.0, "rewards/chosen": -0.21596884727478027, "rewards/margins": 0.42044150829315186, "rewards/rejected": -0.6364103555679321, "step": 3115 }, { "epoch": 8.531143052703628, "grad_norm": 3.3029613494873047, "learning_rate": 5.731506849315068e-07, "log_odds_chosen": 3.8038480281829834, "log_odds_ratio": -0.08960864692926407, "logits/chosen": 0.9474663734436035, "logits/rejected": 0.9416664838790894, "logps/chosen": -1.7055230140686035, "logps/rejected": -5.314827919006348, "loss": 0.5901, "nll_loss": 0.581143856048584, "rewards/accuracies": 1.0, "rewards/chosen": -0.1705523133277893, "rewards/margins": 0.36093050241470337, "rewards/rejected": -0.5314828157424927, "step": 3116 }, { "epoch": 8.53388090349076, "grad_norm": 3.978071689605713, "learning_rate": 5.730136986301369e-07, "log_odds_chosen": 1.8417952060699463, "log_odds_ratio": -0.3354313373565674, "logits/chosen": 1.106706142425537, "logits/rejected": 1.1079376935958862, "logps/chosen": -2.187746047973633, "logps/rejected": -3.9755115509033203, "loss": 0.698, "nll_loss": 0.6644554734230042, "rewards/accuracies": 0.875, "rewards/chosen": -0.21877461671829224, "rewards/margins": 0.17877653241157532, "rewards/rejected": -0.39755114912986755, "step": 3117 }, { "epoch": 8.536618754277892, "grad_norm": 4.100435733795166, "learning_rate": 5.728767123287671e-07, "log_odds_chosen": 1.3653825521469116, "log_odds_ratio": -0.32507145404815674, "logits/chosen": 0.9297592043876648, "logits/rejected": 0.8026930689811707, "logps/chosen": -1.459699034690857, "logps/rejected": -2.6718716621398926, "loss": 0.6175, "nll_loss": 0.5849994421005249, "rewards/accuracies": 1.0, "rewards/chosen": -0.1459699124097824, "rewards/margins": 0.1212172582745552, "rewards/rejected": -0.2671871781349182, "step": 3118 }, { "epoch": 8.539356605065024, "grad_norm": 3.4840519428253174, "learning_rate": 5.727397260273973e-07, "log_odds_chosen": 2.3281147480010986, "log_odds_ratio": -0.4629563093185425, "logits/chosen": 0.7967567443847656, "logits/rejected": 0.8073267936706543, "logps/chosen": -1.9478859901428223, "logps/rejected": -4.174509048461914, "loss": 0.6635, "nll_loss": 0.6171767711639404, "rewards/accuracies": 0.75, "rewards/chosen": -0.1947885900735855, "rewards/margins": 0.2226623296737671, "rewards/rejected": -0.4174509048461914, "step": 3119 }, { "epoch": 8.542094455852157, "grad_norm": 4.69523286819458, "learning_rate": 5.726027397260273e-07, "log_odds_chosen": 3.631519079208374, "log_odds_ratio": -0.22640758752822876, "logits/chosen": 0.8351281881332397, "logits/rejected": 0.8600236177444458, "logps/chosen": -1.9877995252609253, "logps/rejected": -5.439120292663574, "loss": 0.7265, "nll_loss": 0.7038702964782715, "rewards/accuracies": 0.875, "rewards/chosen": -0.19877995550632477, "rewards/margins": 0.34513211250305176, "rewards/rejected": -0.5439120531082153, "step": 3120 }, { "epoch": 8.544832306639288, "grad_norm": 3.757525682449341, "learning_rate": 5.724657534246575e-07, "log_odds_chosen": 4.232691764831543, "log_odds_ratio": -0.3415564298629761, "logits/chosen": 0.9296379089355469, "logits/rejected": 0.9879932999610901, "logps/chosen": -2.5163040161132812, "logps/rejected": -6.6805949211120605, "loss": 0.8354, "nll_loss": 0.8011959195137024, "rewards/accuracies": 0.75, "rewards/chosen": -0.25163042545318604, "rewards/margins": 0.4164290726184845, "rewards/rejected": -0.6680594682693481, "step": 3121 }, { "epoch": 8.54757015742642, "grad_norm": 2.847240686416626, "learning_rate": 5.723287671232876e-07, "log_odds_chosen": 2.1895711421966553, "log_odds_ratio": -0.1978505551815033, "logits/chosen": 0.7076476812362671, "logits/rejected": 0.628613293170929, "logps/chosen": -1.2470543384552002, "logps/rejected": -3.1338119506835938, "loss": 0.6122, "nll_loss": 0.5923824906349182, "rewards/accuracies": 1.0, "rewards/chosen": -0.12470544874668121, "rewards/margins": 0.18867576122283936, "rewards/rejected": -0.3133811950683594, "step": 3122 }, { "epoch": 8.550308008213552, "grad_norm": 3.4006757736206055, "learning_rate": 5.721917808219177e-07, "log_odds_chosen": 1.8801509141921997, "log_odds_ratio": -0.2852354049682617, "logits/chosen": 0.9413941502571106, "logits/rejected": 0.9330251812934875, "logps/chosen": -1.9099873304367065, "logps/rejected": -3.6811904907226562, "loss": 0.6474, "nll_loss": 0.618903398513794, "rewards/accuracies": 0.875, "rewards/chosen": -0.19099873304367065, "rewards/margins": 0.17712034285068512, "rewards/rejected": -0.3681190609931946, "step": 3123 }, { "epoch": 8.553045859000685, "grad_norm": 2.8614869117736816, "learning_rate": 5.720547945205479e-07, "log_odds_chosen": 3.380788803100586, "log_odds_ratio": -0.19744017720222473, "logits/chosen": 0.7534053921699524, "logits/rejected": 0.7561802864074707, "logps/chosen": -1.9091682434082031, "logps/rejected": -5.143769264221191, "loss": 0.6687, "nll_loss": 0.6489822864532471, "rewards/accuracies": 1.0, "rewards/chosen": -0.19091682136058807, "rewards/margins": 0.3234601318836212, "rewards/rejected": -0.5143769383430481, "step": 3124 }, { "epoch": 8.555783709787816, "grad_norm": 3.1545798778533936, "learning_rate": 5.71917808219178e-07, "log_odds_chosen": 1.7475603818893433, "log_odds_ratio": -0.29518821835517883, "logits/chosen": 0.9010549783706665, "logits/rejected": 0.8647221326828003, "logps/chosen": -1.9100593328475952, "logps/rejected": -3.547189235687256, "loss": 0.6256, "nll_loss": 0.5960475206375122, "rewards/accuracies": 0.875, "rewards/chosen": -0.19100594520568848, "rewards/margins": 0.1637129932641983, "rewards/rejected": -0.3547189235687256, "step": 3125 }, { "epoch": 8.558521560574949, "grad_norm": 3.4697508811950684, "learning_rate": 5.717808219178082e-07, "log_odds_chosen": 1.2056429386138916, "log_odds_ratio": -0.4007265567779541, "logits/chosen": 0.6559417843818665, "logits/rejected": 0.5774258971214294, "logps/chosen": -2.185469150543213, "logps/rejected": -3.3087387084960938, "loss": 0.6502, "nll_loss": 0.610129177570343, "rewards/accuracies": 1.0, "rewards/chosen": -0.21854694187641144, "rewards/margins": 0.11232694983482361, "rewards/rejected": -0.33087384700775146, "step": 3126 }, { "epoch": 8.56125941136208, "grad_norm": 3.371485710144043, "learning_rate": 5.716438356164383e-07, "log_odds_chosen": 2.1800248622894287, "log_odds_ratio": -0.26270222663879395, "logits/chosen": 0.9124853014945984, "logits/rejected": 0.8357639312744141, "logps/chosen": -2.0795164108276367, "logps/rejected": -4.108943939208984, "loss": 0.6694, "nll_loss": 0.6431742906570435, "rewards/accuracies": 1.0, "rewards/chosen": -0.2079516351222992, "rewards/margins": 0.20294280350208282, "rewards/rejected": -0.4108944237232208, "step": 3127 }, { "epoch": 8.563997262149213, "grad_norm": 3.2651054859161377, "learning_rate": 5.715068493150684e-07, "log_odds_chosen": 1.745647668838501, "log_odds_ratio": -0.26958227157592773, "logits/chosen": 0.6915274262428284, "logits/rejected": 0.6416803002357483, "logps/chosen": -1.827257752418518, "logps/rejected": -3.443542003631592, "loss": 0.5966, "nll_loss": 0.5696812868118286, "rewards/accuracies": 0.875, "rewards/chosen": -0.18272577226161957, "rewards/margins": 0.16162844002246857, "rewards/rejected": -0.34435421228408813, "step": 3128 }, { "epoch": 8.566735112936344, "grad_norm": 3.1320888996124268, "learning_rate": 5.713698630136986e-07, "log_odds_chosen": 3.191601514816284, "log_odds_ratio": -0.2055833637714386, "logits/chosen": 0.8494200110435486, "logits/rejected": 0.8555428981781006, "logps/chosen": -2.1340785026550293, "logps/rejected": -5.176047325134277, "loss": 0.5631, "nll_loss": 0.5424937605857849, "rewards/accuracies": 0.875, "rewards/chosen": -0.21340785920619965, "rewards/margins": 0.30419692397117615, "rewards/rejected": -0.5176048278808594, "step": 3129 }, { "epoch": 8.569472963723477, "grad_norm": 3.477029800415039, "learning_rate": 5.712328767123287e-07, "log_odds_chosen": 2.117093086242676, "log_odds_ratio": -0.1735428124666214, "logits/chosen": 1.2417175769805908, "logits/rejected": 1.2779948711395264, "logps/chosen": -2.1188173294067383, "logps/rejected": -4.0465545654296875, "loss": 0.5905, "nll_loss": 0.5731909275054932, "rewards/accuracies": 1.0, "rewards/chosen": -0.21188172698020935, "rewards/margins": 0.1927737295627594, "rewards/rejected": -0.40465545654296875, "step": 3130 }, { "epoch": 8.572210814510608, "grad_norm": 3.2370991706848145, "learning_rate": 5.710958904109588e-07, "log_odds_chosen": 1.8466782569885254, "log_odds_ratio": -0.31858760118484497, "logits/chosen": 1.1288347244262695, "logits/rejected": 1.1634200811386108, "logps/chosen": -1.9573028087615967, "logps/rejected": -3.700718402862549, "loss": 0.6258, "nll_loss": 0.5939759016036987, "rewards/accuracies": 0.875, "rewards/chosen": -0.1957302838563919, "rewards/margins": 0.17434155941009521, "rewards/rejected": -0.3700718283653259, "step": 3131 }, { "epoch": 8.574948665297741, "grad_norm": 3.375079870223999, "learning_rate": 5.70958904109589e-07, "log_odds_chosen": 1.7854230403900146, "log_odds_ratio": -0.2268626093864441, "logits/chosen": 0.9324727058410645, "logits/rejected": 0.8851602077484131, "logps/chosen": -1.433237075805664, "logps/rejected": -2.97935152053833, "loss": 0.5962, "nll_loss": 0.5734804272651672, "rewards/accuracies": 1.0, "rewards/chosen": -0.14332371950149536, "rewards/margins": 0.15461145341396332, "rewards/rejected": -0.2979351282119751, "step": 3132 }, { "epoch": 8.577686516084874, "grad_norm": 3.531191349029541, "learning_rate": 5.708219178082192e-07, "log_odds_chosen": 2.4206461906433105, "log_odds_ratio": -0.18986709415912628, "logits/chosen": 0.7407463788986206, "logits/rejected": 0.6953410506248474, "logps/chosen": -2.023952007293701, "logps/rejected": -4.233817100524902, "loss": 0.6242, "nll_loss": 0.6051894426345825, "rewards/accuracies": 1.0, "rewards/chosen": -0.20239520072937012, "rewards/margins": 0.2209865301847458, "rewards/rejected": -0.4233817160129547, "step": 3133 }, { "epoch": 8.580424366872005, "grad_norm": 3.1370480060577393, "learning_rate": 5.706849315068492e-07, "log_odds_chosen": 3.27852725982666, "log_odds_ratio": -0.1836796998977661, "logits/chosen": 0.9137442708015442, "logits/rejected": 0.8707003593444824, "logps/chosen": -1.7458469867706299, "logps/rejected": -4.855716228485107, "loss": 0.5666, "nll_loss": 0.5482065081596375, "rewards/accuracies": 0.875, "rewards/chosen": -0.17458470165729523, "rewards/margins": 0.3109869360923767, "rewards/rejected": -0.48557162284851074, "step": 3134 }, { "epoch": 8.583162217659137, "grad_norm": 4.306238651275635, "learning_rate": 5.705479452054794e-07, "log_odds_chosen": 3.338080406188965, "log_odds_ratio": -0.17607305943965912, "logits/chosen": 0.8913712501525879, "logits/rejected": 0.9401165843009949, "logps/chosen": -2.254763126373291, "logps/rejected": -5.44819450378418, "loss": 0.705, "nll_loss": 0.6873716711997986, "rewards/accuracies": 1.0, "rewards/chosen": -0.22547632455825806, "rewards/margins": 0.3193431794643402, "rewards/rejected": -0.5448194742202759, "step": 3135 }, { "epoch": 8.58590006844627, "grad_norm": 4.117812156677246, "learning_rate": 5.704109589041096e-07, "log_odds_chosen": 1.3907579183578491, "log_odds_ratio": -0.3179273307323456, "logits/chosen": 0.953313946723938, "logits/rejected": 0.9256587028503418, "logps/chosen": -2.560431957244873, "logps/rejected": -3.8444790840148926, "loss": 0.6469, "nll_loss": 0.6150809526443481, "rewards/accuracies": 1.0, "rewards/chosen": -0.2560431957244873, "rewards/margins": 0.12840472161769867, "rewards/rejected": -0.38444793224334717, "step": 3136 }, { "epoch": 8.588637919233403, "grad_norm": 2.8746798038482666, "learning_rate": 5.702739726027396e-07, "log_odds_chosen": 3.7496938705444336, "log_odds_ratio": -0.12469565123319626, "logits/chosen": 1.1126534938812256, "logits/rejected": 1.1153340339660645, "logps/chosen": -2.193650960922241, "logps/rejected": -5.781683921813965, "loss": 0.669, "nll_loss": 0.6565436124801636, "rewards/accuracies": 1.0, "rewards/chosen": -0.21936509013175964, "rewards/margins": 0.35880324244499207, "rewards/rejected": -0.5781683921813965, "step": 3137 }, { "epoch": 8.591375770020534, "grad_norm": 3.0271525382995605, "learning_rate": 5.701369863013698e-07, "log_odds_chosen": 3.1770195960998535, "log_odds_ratio": -0.1313593089580536, "logits/chosen": 0.9995391368865967, "logits/rejected": 1.0347387790679932, "logps/chosen": -1.8249166011810303, "logps/rejected": -4.8335089683532715, "loss": 0.6452, "nll_loss": 0.6321014761924744, "rewards/accuracies": 1.0, "rewards/chosen": -0.18249167501926422, "rewards/margins": 0.300859272480011, "rewards/rejected": -0.483350932598114, "step": 3138 }, { "epoch": 8.594113620807667, "grad_norm": 4.048715114593506, "learning_rate": 5.699999999999999e-07, "log_odds_chosen": 1.733362078666687, "log_odds_ratio": -0.4200633764266968, "logits/chosen": 1.1287838220596313, "logits/rejected": 1.1277661323547363, "logps/chosen": -2.140458106994629, "logps/rejected": -3.7000598907470703, "loss": 0.653, "nll_loss": 0.611009955406189, "rewards/accuracies": 0.75, "rewards/chosen": -0.21404583752155304, "rewards/margins": 0.15596017241477966, "rewards/rejected": -0.3700060248374939, "step": 3139 }, { "epoch": 8.596851471594798, "grad_norm": 3.367094039916992, "learning_rate": 5.698630136986301e-07, "log_odds_chosen": 3.8246541023254395, "log_odds_ratio": -0.22041234374046326, "logits/chosen": 1.0306261777877808, "logits/rejected": 1.0711275339126587, "logps/chosen": -1.8542407751083374, "logps/rejected": -5.572366714477539, "loss": 0.562, "nll_loss": 0.539969265460968, "rewards/accuracies": 0.875, "rewards/chosen": -0.1854240894317627, "rewards/margins": 0.3718125820159912, "rewards/rejected": -0.5572366118431091, "step": 3140 }, { "epoch": 8.59958932238193, "grad_norm": 3.310736894607544, "learning_rate": 5.697260273972602e-07, "log_odds_chosen": 3.080240249633789, "log_odds_ratio": -0.31055063009262085, "logits/chosen": 0.6769194602966309, "logits/rejected": 0.6604695320129395, "logps/chosen": -1.812890887260437, "logps/rejected": -4.711891174316406, "loss": 0.6232, "nll_loss": 0.5921873450279236, "rewards/accuracies": 0.875, "rewards/chosen": -0.18128907680511475, "rewards/margins": 0.289900004863739, "rewards/rejected": -0.47118911147117615, "step": 3141 }, { "epoch": 8.602327173169062, "grad_norm": 3.150283098220825, "learning_rate": 5.695890410958903e-07, "log_odds_chosen": 3.1662418842315674, "log_odds_ratio": -0.12913326919078827, "logits/chosen": 0.8361988663673401, "logits/rejected": 0.8172776699066162, "logps/chosen": -1.5410575866699219, "logps/rejected": -4.482979774475098, "loss": 0.6408, "nll_loss": 0.6279212832450867, "rewards/accuracies": 1.0, "rewards/chosen": -0.1541057676076889, "rewards/margins": 0.29419225454330444, "rewards/rejected": -0.44829800724983215, "step": 3142 }, { "epoch": 8.605065023956195, "grad_norm": 5.119185924530029, "learning_rate": 5.694520547945206e-07, "log_odds_chosen": 1.639225721359253, "log_odds_ratio": -0.6590372920036316, "logits/chosen": 0.63213711977005, "logits/rejected": 0.6058750748634338, "logps/chosen": -2.2564585208892822, "logps/rejected": -3.6986782550811768, "loss": 0.8524, "nll_loss": 0.7864930033683777, "rewards/accuracies": 0.75, "rewards/chosen": -0.22564585506916046, "rewards/margins": 0.1442219614982605, "rewards/rejected": -0.36986783146858215, "step": 3143 }, { "epoch": 8.607802874743326, "grad_norm": 3.003492593765259, "learning_rate": 5.693150684931506e-07, "log_odds_chosen": 4.429576396942139, "log_odds_ratio": -0.2906900942325592, "logits/chosen": 0.9756472706794739, "logits/rejected": 0.9826866984367371, "logps/chosen": -2.1631059646606445, "logps/rejected": -6.462007999420166, "loss": 0.6409, "nll_loss": 0.6117827892303467, "rewards/accuracies": 0.875, "rewards/chosen": -0.21631059050559998, "rewards/margins": 0.4298902451992035, "rewards/rejected": -0.6462008357048035, "step": 3144 }, { "epoch": 8.61054072553046, "grad_norm": 3.763184070587158, "learning_rate": 5.691780821917807e-07, "log_odds_chosen": 2.3409502506256104, "log_odds_ratio": -0.3313382863998413, "logits/chosen": 0.9625017046928406, "logits/rejected": 1.0339688062667847, "logps/chosen": -2.95039439201355, "logps/rejected": -5.209333419799805, "loss": 0.6778, "nll_loss": 0.644621729850769, "rewards/accuracies": 0.875, "rewards/chosen": -0.29503944516181946, "rewards/margins": 0.22589391469955444, "rewards/rejected": -0.5209333300590515, "step": 3145 }, { "epoch": 8.61327857631759, "grad_norm": 5.437530517578125, "learning_rate": 5.69041095890411e-07, "log_odds_chosen": 2.047560691833496, "log_odds_ratio": -0.6309150457382202, "logits/chosen": 0.9914434552192688, "logits/rejected": 0.9422076344490051, "logps/chosen": -1.7988996505737305, "logps/rejected": -3.703463554382324, "loss": 0.6705, "nll_loss": 0.6074298620223999, "rewards/accuracies": 0.5, "rewards/chosen": -0.179889976978302, "rewards/margins": 0.19045640528202057, "rewards/rejected": -0.3703463673591614, "step": 3146 }, { "epoch": 8.616016427104723, "grad_norm": 3.104398012161255, "learning_rate": 5.689041095890412e-07, "log_odds_chosen": 3.734246253967285, "log_odds_ratio": -0.1108553484082222, "logits/chosen": 0.9888091683387756, "logits/rejected": 1.0239500999450684, "logps/chosen": -1.9607398509979248, "logps/rejected": -5.4317779541015625, "loss": 0.5971, "nll_loss": 0.5860349535942078, "rewards/accuracies": 1.0, "rewards/chosen": -0.1960739940404892, "rewards/margins": 0.3471038043498993, "rewards/rejected": -0.5431778430938721, "step": 3147 }, { "epoch": 8.618754277891854, "grad_norm": 3.095475673675537, "learning_rate": 5.687671232876712e-07, "log_odds_chosen": 3.5486133098602295, "log_odds_ratio": -0.0991634875535965, "logits/chosen": 1.190484642982483, "logits/rejected": 1.185700535774231, "logps/chosen": -2.1385107040405273, "logps/rejected": -5.54401969909668, "loss": 0.5877, "nll_loss": 0.5778238773345947, "rewards/accuracies": 1.0, "rewards/chosen": -0.21385107934474945, "rewards/margins": 0.34055083990097046, "rewards/rejected": -0.5544019341468811, "step": 3148 }, { "epoch": 8.621492128678987, "grad_norm": 3.062196731567383, "learning_rate": 5.686301369863014e-07, "log_odds_chosen": 1.3697528839111328, "log_odds_ratio": -0.29712945222854614, "logits/chosen": 0.9482317566871643, "logits/rejected": 0.9140681028366089, "logps/chosen": -1.7921366691589355, "logps/rejected": -3.012528896331787, "loss": 0.5507, "nll_loss": 0.5210067629814148, "rewards/accuracies": 0.875, "rewards/chosen": -0.17921367287635803, "rewards/margins": 0.12203922122716904, "rewards/rejected": -0.30125290155410767, "step": 3149 }, { "epoch": 8.624229979466119, "grad_norm": 2.9144651889801025, "learning_rate": 5.684931506849316e-07, "log_odds_chosen": 3.0641701221466064, "log_odds_ratio": -0.17389827966690063, "logits/chosen": 0.8957750797271729, "logits/rejected": 0.883712887763977, "logps/chosen": -1.4857598543167114, "logps/rejected": -4.251360893249512, "loss": 0.6001, "nll_loss": 0.5827383399009705, "rewards/accuracies": 1.0, "rewards/chosen": -0.14857599139213562, "rewards/margins": 0.27656012773513794, "rewards/rejected": -0.42513608932495117, "step": 3150 }, { "epoch": 8.626967830253252, "grad_norm": 3.4120686054229736, "learning_rate": 5.683561643835616e-07, "log_odds_chosen": 2.2249186038970947, "log_odds_ratio": -0.25875139236450195, "logits/chosen": 0.8458842039108276, "logits/rejected": 0.8183038830757141, "logps/chosen": -2.412662982940674, "logps/rejected": -4.407339572906494, "loss": 0.5724, "nll_loss": 0.5465021133422852, "rewards/accuracies": 0.875, "rewards/chosen": -0.24126628041267395, "rewards/margins": 0.19946768879890442, "rewards/rejected": -0.44073396921157837, "step": 3151 }, { "epoch": 8.629705681040383, "grad_norm": 6.081529140472412, "learning_rate": 5.682191780821918e-07, "log_odds_chosen": 1.9909731149673462, "log_odds_ratio": -0.7206553220748901, "logits/chosen": 1.209578514099121, "logits/rejected": 1.1581288576126099, "logps/chosen": -2.4346890449523926, "logps/rejected": -4.222764015197754, "loss": 0.7041, "nll_loss": 0.6319862008094788, "rewards/accuracies": 0.875, "rewards/chosen": -0.24346894025802612, "rewards/margins": 0.17880749702453613, "rewards/rejected": -0.42227643728256226, "step": 3152 }, { "epoch": 8.632443531827516, "grad_norm": 3.3814353942871094, "learning_rate": 5.680821917808219e-07, "log_odds_chosen": 3.3092198371887207, "log_odds_ratio": -0.16486071050167084, "logits/chosen": 0.9821648597717285, "logits/rejected": 0.9937901496887207, "logps/chosen": -1.8222019672393799, "logps/rejected": -4.987473487854004, "loss": 0.6411, "nll_loss": 0.6246483325958252, "rewards/accuracies": 1.0, "rewards/chosen": -0.1822201907634735, "rewards/margins": 0.3165271282196045, "rewards/rejected": -0.4987473487854004, "step": 3153 }, { "epoch": 8.635181382614647, "grad_norm": 3.7649126052856445, "learning_rate": 5.679452054794521e-07, "log_odds_chosen": 2.2846860885620117, "log_odds_ratio": -0.26124873757362366, "logits/chosen": 0.8157573342323303, "logits/rejected": 0.7781153917312622, "logps/chosen": -1.2958910465240479, "logps/rejected": -3.3448667526245117, "loss": 0.7086, "nll_loss": 0.6824576258659363, "rewards/accuracies": 1.0, "rewards/chosen": -0.12958911061286926, "rewards/margins": 0.20489756762981415, "rewards/rejected": -0.3344866633415222, "step": 3154 }, { "epoch": 8.63791923340178, "grad_norm": 3.5693511962890625, "learning_rate": 5.678082191780822e-07, "log_odds_chosen": 1.8888720273971558, "log_odds_ratio": -0.3731426000595093, "logits/chosen": 0.7681206464767456, "logits/rejected": 0.7442538738250732, "logps/chosen": -2.644791841506958, "logps/rejected": -4.322592258453369, "loss": 0.6728, "nll_loss": 0.6354459524154663, "rewards/accuracies": 0.75, "rewards/chosen": -0.2644791901111603, "rewards/margins": 0.1677800714969635, "rewards/rejected": -0.43225929141044617, "step": 3155 }, { "epoch": 8.640657084188911, "grad_norm": 3.2810020446777344, "learning_rate": 5.676712328767123e-07, "log_odds_chosen": 2.9072577953338623, "log_odds_ratio": -0.33930933475494385, "logits/chosen": 0.9638288021087646, "logits/rejected": 1.0374534130096436, "logps/chosen": -1.6812548637390137, "logps/rejected": -4.284679889678955, "loss": 0.7227, "nll_loss": 0.6887727975845337, "rewards/accuracies": 0.875, "rewards/chosen": -0.1681254804134369, "rewards/margins": 0.260342538356781, "rewards/rejected": -0.4284680485725403, "step": 3156 }, { "epoch": 8.643394934976044, "grad_norm": 3.6699047088623047, "learning_rate": 5.675342465753425e-07, "log_odds_chosen": 1.127993106842041, "log_odds_ratio": -0.3766609728336334, "logits/chosen": 0.8939231634140015, "logits/rejected": 0.9490048885345459, "logps/chosen": -2.547760009765625, "logps/rejected": -3.6262500286102295, "loss": 0.6975, "nll_loss": 0.659858763217926, "rewards/accuracies": 0.75, "rewards/chosen": -0.2547760009765625, "rewards/margins": 0.10784903168678284, "rewards/rejected": -0.36262500286102295, "step": 3157 }, { "epoch": 8.646132785763175, "grad_norm": 3.1011974811553955, "learning_rate": 5.673972602739726e-07, "log_odds_chosen": 1.7523316144943237, "log_odds_ratio": -0.31695669889450073, "logits/chosen": 0.7786275148391724, "logits/rejected": 0.6859713792800903, "logps/chosen": -1.4884445667266846, "logps/rejected": -3.0966145992279053, "loss": 0.5969, "nll_loss": 0.5652215480804443, "rewards/accuracies": 1.0, "rewards/chosen": -0.14884445071220398, "rewards/margins": 0.1608169972896576, "rewards/rejected": -0.30966147780418396, "step": 3158 }, { "epoch": 8.648870636550308, "grad_norm": 3.6452689170837402, "learning_rate": 5.672602739726027e-07, "log_odds_chosen": 3.5722410678863525, "log_odds_ratio": -0.08133915066719055, "logits/chosen": 1.225990653038025, "logits/rejected": 1.2607684135437012, "logps/chosen": -2.4339776039123535, "logps/rejected": -5.882427215576172, "loss": 0.7328, "nll_loss": 0.724647045135498, "rewards/accuracies": 1.0, "rewards/chosen": -0.2433977723121643, "rewards/margins": 0.3448449969291687, "rewards/rejected": -0.588242769241333, "step": 3159 }, { "epoch": 8.651608487337441, "grad_norm": 8.307868003845215, "learning_rate": 5.671232876712329e-07, "log_odds_chosen": 1.1298623085021973, "log_odds_ratio": -0.5884230136871338, "logits/chosen": 0.815625786781311, "logits/rejected": 0.7709327936172485, "logps/chosen": -2.4950008392333984, "logps/rejected": -3.565178394317627, "loss": 0.6622, "nll_loss": 0.6034067273139954, "rewards/accuracies": 0.75, "rewards/chosen": -0.2495000660419464, "rewards/margins": 0.10701777040958405, "rewards/rejected": -0.35651785135269165, "step": 3160 }, { "epoch": 8.654346338124572, "grad_norm": 2.6494319438934326, "learning_rate": 5.669863013698631e-07, "log_odds_chosen": 3.192411184310913, "log_odds_ratio": -0.1509665846824646, "logits/chosen": 0.92440265417099, "logits/rejected": 0.9169396162033081, "logps/chosen": -3.035341262817383, "logps/rejected": -6.059531211853027, "loss": 0.6509, "nll_loss": 0.6358510255813599, "rewards/accuracies": 0.875, "rewards/chosen": -0.3035341203212738, "rewards/margins": 0.3024190366268158, "rewards/rejected": -0.6059531569480896, "step": 3161 }, { "epoch": 8.657084188911703, "grad_norm": 3.235095977783203, "learning_rate": 5.668493150684931e-07, "log_odds_chosen": 3.8902907371520996, "log_odds_ratio": -0.10004657506942749, "logits/chosen": 1.0031582117080688, "logits/rejected": 1.0565263032913208, "logps/chosen": -2.357706069946289, "logps/rejected": -6.138888359069824, "loss": 0.6395, "nll_loss": 0.6295369267463684, "rewards/accuracies": 1.0, "rewards/chosen": -0.23577064275741577, "rewards/margins": 0.37811824679374695, "rewards/rejected": -0.6138888597488403, "step": 3162 }, { "epoch": 8.659822039698836, "grad_norm": 3.3328659534454346, "learning_rate": 5.667123287671233e-07, "log_odds_chosen": 2.259380340576172, "log_odds_ratio": -0.23709706962108612, "logits/chosen": 1.0413111448287964, "logits/rejected": 1.035746693611145, "logps/chosen": -1.3094661235809326, "logps/rejected": -3.326018810272217, "loss": 0.6003, "nll_loss": 0.5765791535377502, "rewards/accuracies": 1.0, "rewards/chosen": -0.13094662129878998, "rewards/margins": 0.2016552984714508, "rewards/rejected": -0.3326019048690796, "step": 3163 }, { "epoch": 8.66255989048597, "grad_norm": 6.185794353485107, "learning_rate": 5.665753424657535e-07, "log_odds_chosen": 2.858954906463623, "log_odds_ratio": -0.3291948437690735, "logits/chosen": 0.9456779956817627, "logits/rejected": 0.8552400469779968, "logps/chosen": -1.4225763082504272, "logps/rejected": -4.052544593811035, "loss": 0.6319, "nll_loss": 0.5989515781402588, "rewards/accuracies": 0.875, "rewards/chosen": -0.14225763082504272, "rewards/margins": 0.2629968225955963, "rewards/rejected": -0.4052544832229614, "step": 3164 }, { "epoch": 8.6652977412731, "grad_norm": 3.082948684692383, "learning_rate": 5.664383561643835e-07, "log_odds_chosen": 3.4278149604797363, "log_odds_ratio": -0.2466059774160385, "logits/chosen": 0.6246459484100342, "logits/rejected": 0.5874334573745728, "logps/chosen": -2.16203236579895, "logps/rejected": -5.468081474304199, "loss": 0.6958, "nll_loss": 0.6711277961730957, "rewards/accuracies": 0.875, "rewards/chosen": -0.2162032425403595, "rewards/margins": 0.3306048810482025, "rewards/rejected": -0.546808123588562, "step": 3165 }, { "epoch": 8.668035592060233, "grad_norm": 2.8824996948242188, "learning_rate": 5.663013698630137e-07, "log_odds_chosen": 2.2191321849823, "log_odds_ratio": -0.22320136427879333, "logits/chosen": 0.7971136569976807, "logits/rejected": 0.7716754078865051, "logps/chosen": -2.082139492034912, "logps/rejected": -4.130335330963135, "loss": 0.6624, "nll_loss": 0.6401224136352539, "rewards/accuracies": 1.0, "rewards/chosen": -0.2082139551639557, "rewards/margins": 0.20481958985328674, "rewards/rejected": -0.41303354501724243, "step": 3166 }, { "epoch": 8.670773442847365, "grad_norm": 3.5329840183258057, "learning_rate": 5.661643835616438e-07, "log_odds_chosen": 2.787184238433838, "log_odds_ratio": -0.20018120110034943, "logits/chosen": 1.1137031316757202, "logits/rejected": 1.1034905910491943, "logps/chosen": -1.8317389488220215, "logps/rejected": -4.463916778564453, "loss": 0.5997, "nll_loss": 0.5797258019447327, "rewards/accuracies": 1.0, "rewards/chosen": -0.18317390978336334, "rewards/margins": 0.2632177472114563, "rewards/rejected": -0.44639164209365845, "step": 3167 }, { "epoch": 8.673511293634498, "grad_norm": 3.0755910873413086, "learning_rate": 5.66027397260274e-07, "log_odds_chosen": 3.589369297027588, "log_odds_ratio": -0.1732720136642456, "logits/chosen": 0.7424793243408203, "logits/rejected": 0.7052728533744812, "logps/chosen": -1.6536533832550049, "logps/rejected": -5.05316686630249, "loss": 0.7161, "nll_loss": 0.698785662651062, "rewards/accuracies": 1.0, "rewards/chosen": -0.1653653383255005, "rewards/margins": 0.3399513363838196, "rewards/rejected": -0.5053166151046753, "step": 3168 }, { "epoch": 8.676249144421629, "grad_norm": 3.1997017860412598, "learning_rate": 5.658904109589041e-07, "log_odds_chosen": 2.7781803607940674, "log_odds_ratio": -0.15823566913604736, "logits/chosen": 1.1086145639419556, "logits/rejected": 1.1033694744110107, "logps/chosen": -1.925965428352356, "logps/rejected": -4.468255519866943, "loss": 0.6056, "nll_loss": 0.5898239016532898, "rewards/accuracies": 1.0, "rewards/chosen": -0.19259653985500336, "rewards/margins": 0.25422900915145874, "rewards/rejected": -0.4468255639076233, "step": 3169 }, { "epoch": 8.678986995208762, "grad_norm": 4.503649711608887, "learning_rate": 5.657534246575342e-07, "log_odds_chosen": 2.8284149169921875, "log_odds_ratio": -0.36135220527648926, "logits/chosen": 0.9423738718032837, "logits/rejected": 0.9114512205123901, "logps/chosen": -1.947306752204895, "logps/rejected": -4.660597324371338, "loss": 0.6988, "nll_loss": 0.6626761555671692, "rewards/accuracies": 0.875, "rewards/chosen": -0.19473066926002502, "rewards/margins": 0.27132904529571533, "rewards/rejected": -0.46605971455574036, "step": 3170 }, { "epoch": 8.681724845995893, "grad_norm": 3.5722053050994873, "learning_rate": 5.656164383561644e-07, "log_odds_chosen": 3.284302234649658, "log_odds_ratio": -0.1006428524851799, "logits/chosen": 1.1737245321273804, "logits/rejected": 1.2464183568954468, "logps/chosen": -1.6861355304718018, "logps/rejected": -4.742786407470703, "loss": 0.4927, "nll_loss": 0.4826117753982544, "rewards/accuracies": 1.0, "rewards/chosen": -0.16861355304718018, "rewards/margins": 0.30566510558128357, "rewards/rejected": -0.47427862882614136, "step": 3171 }, { "epoch": 8.684462696783026, "grad_norm": 10.16169548034668, "learning_rate": 5.654794520547945e-07, "log_odds_chosen": 1.3365066051483154, "log_odds_ratio": -1.1095887422561646, "logits/chosen": 0.8625728487968445, "logits/rejected": 0.8205151557922363, "logps/chosen": -3.397921085357666, "logps/rejected": -4.652205467224121, "loss": 0.8528, "nll_loss": 0.7418071031570435, "rewards/accuracies": 0.75, "rewards/chosen": -0.3397921025753021, "rewards/margins": 0.1254284381866455, "rewards/rejected": -0.46522054076194763, "step": 3172 }, { "epoch": 8.687200547570157, "grad_norm": 3.64009165763855, "learning_rate": 5.653424657534246e-07, "log_odds_chosen": 2.2930006980895996, "log_odds_ratio": -0.2523256540298462, "logits/chosen": 1.23867666721344, "logits/rejected": 1.2144089937210083, "logps/chosen": -1.8753726482391357, "logps/rejected": -3.9099621772766113, "loss": 0.5639, "nll_loss": 0.5387056469917297, "rewards/accuracies": 0.875, "rewards/chosen": -0.18753725290298462, "rewards/margins": 0.20345893502235413, "rewards/rejected": -0.39099621772766113, "step": 3173 }, { "epoch": 8.68993839835729, "grad_norm": 3.6108617782592773, "learning_rate": 5.652054794520548e-07, "log_odds_chosen": 2.528848171234131, "log_odds_ratio": -0.16618917882442474, "logits/chosen": 0.8326141834259033, "logits/rejected": 0.7858383655548096, "logps/chosen": -1.5955674648284912, "logps/rejected": -3.8958616256713867, "loss": 0.7007, "nll_loss": 0.6840654611587524, "rewards/accuracies": 1.0, "rewards/chosen": -0.15955676138401031, "rewards/margins": 0.2300294041633606, "rewards/rejected": -0.3895861506462097, "step": 3174 }, { "epoch": 8.692676249144421, "grad_norm": 3.4106485843658447, "learning_rate": 5.65068493150685e-07, "log_odds_chosen": 2.433443069458008, "log_odds_ratio": -0.21268437802791595, "logits/chosen": 0.7467865347862244, "logits/rejected": 0.6530283093452454, "logps/chosen": -1.4510810375213623, "logps/rejected": -3.677426815032959, "loss": 0.6828, "nll_loss": 0.6615529656410217, "rewards/accuracies": 1.0, "rewards/chosen": -0.14510810375213623, "rewards/margins": 0.22263458371162415, "rewards/rejected": -0.3677426874637604, "step": 3175 }, { "epoch": 8.695414099931554, "grad_norm": 4.545915126800537, "learning_rate": 5.64931506849315e-07, "log_odds_chosen": 1.101871132850647, "log_odds_ratio": -0.4856365919113159, "logits/chosen": 0.7884161472320557, "logits/rejected": 0.7734308242797852, "logps/chosen": -2.4659676551818848, "logps/rejected": -3.4986114501953125, "loss": 0.7032, "nll_loss": 0.654672384262085, "rewards/accuracies": 0.75, "rewards/chosen": -0.2465967833995819, "rewards/margins": 0.10326438397169113, "rewards/rejected": -0.34986117482185364, "step": 3176 }, { "epoch": 8.698151950718685, "grad_norm": 2.9660189151763916, "learning_rate": 5.647945205479452e-07, "log_odds_chosen": 2.5495803356170654, "log_odds_ratio": -0.19493955373764038, "logits/chosen": 0.6139036417007446, "logits/rejected": 0.5946640968322754, "logps/chosen": -1.942428708076477, "logps/rejected": -4.365263938903809, "loss": 0.6943, "nll_loss": 0.6747705340385437, "rewards/accuracies": 1.0, "rewards/chosen": -0.19424287974834442, "rewards/margins": 0.24228349328041077, "rewards/rejected": -0.436526358127594, "step": 3177 }, { "epoch": 8.700889801505818, "grad_norm": 3.169863700866699, "learning_rate": 5.646575342465754e-07, "log_odds_chosen": 2.3228113651275635, "log_odds_ratio": -0.26552000641822815, "logits/chosen": 0.9407770037651062, "logits/rejected": 0.9072622060775757, "logps/chosen": -2.0419671535491943, "logps/rejected": -4.244863510131836, "loss": 0.6252, "nll_loss": 0.5986484289169312, "rewards/accuracies": 0.875, "rewards/chosen": -0.2041967362165451, "rewards/margins": 0.22028960287570953, "rewards/rejected": -0.424486368894577, "step": 3178 }, { "epoch": 8.70362765229295, "grad_norm": 3.4765076637268066, "learning_rate": 5.645205479452054e-07, "log_odds_chosen": 1.5139219760894775, "log_odds_ratio": -0.2806800603866577, "logits/chosen": 0.6264585256576538, "logits/rejected": 0.6330077052116394, "logps/chosen": -2.0578274726867676, "logps/rejected": -3.434847354888916, "loss": 0.6207, "nll_loss": 0.5925964713096619, "rewards/accuracies": 1.0, "rewards/chosen": -0.2057827115058899, "rewards/margins": 0.13770204782485962, "rewards/rejected": -0.3434847593307495, "step": 3179 }, { "epoch": 8.706365503080082, "grad_norm": 2.941877841949463, "learning_rate": 5.643835616438356e-07, "log_odds_chosen": 4.277688026428223, "log_odds_ratio": -0.07938839495182037, "logits/chosen": 1.0627769231796265, "logits/rejected": 1.110405445098877, "logps/chosen": -2.0752811431884766, "logps/rejected": -6.16079568862915, "loss": 0.6151, "nll_loss": 0.6071668863296509, "rewards/accuracies": 1.0, "rewards/chosen": -0.20752814412117004, "rewards/margins": 0.408551424741745, "rewards/rejected": -0.616079568862915, "step": 3180 }, { "epoch": 8.709103353867214, "grad_norm": 4.925800323486328, "learning_rate": 5.642465753424658e-07, "log_odds_chosen": 1.570281982421875, "log_odds_ratio": -0.5652043223381042, "logits/chosen": 0.6680143475532532, "logits/rejected": 0.7165035009384155, "logps/chosen": -2.291958808898926, "logps/rejected": -3.7193589210510254, "loss": 0.7248, "nll_loss": 0.6682814955711365, "rewards/accuracies": 0.875, "rewards/chosen": -0.22919586300849915, "rewards/margins": 0.14274004101753235, "rewards/rejected": -0.3719359040260315, "step": 3181 }, { "epoch": 8.711841204654347, "grad_norm": 5.276787281036377, "learning_rate": 5.641095890410958e-07, "log_odds_chosen": 1.1146607398986816, "log_odds_ratio": -0.5935637354850769, "logits/chosen": 0.9966768622398376, "logits/rejected": 0.9479379653930664, "logps/chosen": -1.9742881059646606, "logps/rejected": -3.0060932636260986, "loss": 0.7083, "nll_loss": 0.6489830017089844, "rewards/accuracies": 0.625, "rewards/chosen": -0.19742880761623383, "rewards/margins": 0.10318049788475037, "rewards/rejected": -0.3006093204021454, "step": 3182 }, { "epoch": 8.714579055441478, "grad_norm": 2.9671242237091064, "learning_rate": 5.63972602739726e-07, "log_odds_chosen": 3.510488986968994, "log_odds_ratio": -0.06320226192474365, "logits/chosen": 1.1696252822875977, "logits/rejected": 1.1992077827453613, "logps/chosen": -2.0931336879730225, "logps/rejected": -5.4182891845703125, "loss": 0.5939, "nll_loss": 0.5876220464706421, "rewards/accuracies": 1.0, "rewards/chosen": -0.20931336283683777, "rewards/margins": 0.33251556754112244, "rewards/rejected": -0.5418289303779602, "step": 3183 }, { "epoch": 8.71731690622861, "grad_norm": 3.840679407119751, "learning_rate": 5.638356164383561e-07, "log_odds_chosen": 2.5057735443115234, "log_odds_ratio": -0.2642897963523865, "logits/chosen": 0.7685332298278809, "logits/rejected": 0.7608759999275208, "logps/chosen": -2.1192212104797363, "logps/rejected": -4.430064678192139, "loss": 0.6584, "nll_loss": 0.6319878101348877, "rewards/accuracies": 0.875, "rewards/chosen": -0.21192213892936707, "rewards/margins": 0.23108433187007904, "rewards/rejected": -0.4430064558982849, "step": 3184 }, { "epoch": 8.720054757015742, "grad_norm": 4.3745293617248535, "learning_rate": 5.636986301369863e-07, "log_odds_chosen": 1.6884303092956543, "log_odds_ratio": -0.4983781576156616, "logits/chosen": 1.029402732849121, "logits/rejected": 1.0304431915283203, "logps/chosen": -3.162579298019409, "logps/rejected": -4.785688400268555, "loss": 0.7415, "nll_loss": 0.6916905641555786, "rewards/accuracies": 0.875, "rewards/chosen": -0.31625792384147644, "rewards/margins": 0.1623108685016632, "rewards/rejected": -0.47856882214546204, "step": 3185 }, { "epoch": 8.722792607802875, "grad_norm": 6.514035701751709, "learning_rate": 5.635616438356164e-07, "log_odds_chosen": 0.12535230815410614, "log_odds_ratio": -0.9578367471694946, "logits/chosen": 0.9362421035766602, "logits/rejected": 0.9595997929573059, "logps/chosen": -3.3264551162719727, "logps/rejected": -3.402160167694092, "loss": 0.8274, "nll_loss": 0.7316462397575378, "rewards/accuracies": 0.75, "rewards/chosen": -0.3326454758644104, "rewards/margins": 0.007570507004857063, "rewards/rejected": -0.3402160108089447, "step": 3186 }, { "epoch": 8.725530458590008, "grad_norm": 3.11323618888855, "learning_rate": 5.634246575342465e-07, "log_odds_chosen": 2.2206459045410156, "log_odds_ratio": -0.1869678497314453, "logits/chosen": 0.871686577796936, "logits/rejected": 0.8159409761428833, "logps/chosen": -1.6744242906570435, "logps/rejected": -3.702375650405884, "loss": 0.5906, "nll_loss": 0.571941614151001, "rewards/accuracies": 1.0, "rewards/chosen": -0.1674424409866333, "rewards/margins": 0.202795147895813, "rewards/rejected": -0.3702375888824463, "step": 3187 }, { "epoch": 8.728268309377139, "grad_norm": 3.084040403366089, "learning_rate": 5.632876712328767e-07, "log_odds_chosen": 2.382610559463501, "log_odds_ratio": -0.29124775528907776, "logits/chosen": 0.883112370967865, "logits/rejected": 0.8973871469497681, "logps/chosen": -2.1609296798706055, "logps/rejected": -4.4470367431640625, "loss": 0.7371, "nll_loss": 0.7079618573188782, "rewards/accuracies": 0.875, "rewards/chosen": -0.21609295904636383, "rewards/margins": 0.22861075401306152, "rewards/rejected": -0.44470372796058655, "step": 3188 }, { "epoch": 8.73100616016427, "grad_norm": 3.409264087677002, "learning_rate": 5.631506849315069e-07, "log_odds_chosen": 2.7990314960479736, "log_odds_ratio": -0.16514819860458374, "logits/chosen": 0.6339519023895264, "logits/rejected": 0.5879687070846558, "logps/chosen": -2.8014235496520996, "logps/rejected": -5.484735012054443, "loss": 0.869, "nll_loss": 0.8524454832077026, "rewards/accuracies": 1.0, "rewards/chosen": -0.2801423966884613, "rewards/margins": 0.2683311700820923, "rewards/rejected": -0.5484734773635864, "step": 3189 }, { "epoch": 8.733744010951403, "grad_norm": 3.210475206375122, "learning_rate": 5.630136986301369e-07, "log_odds_chosen": 2.042534589767456, "log_odds_ratio": -0.2237112820148468, "logits/chosen": 0.7153945565223694, "logits/rejected": 0.654079258441925, "logps/chosen": -1.6391937732696533, "logps/rejected": -3.4891114234924316, "loss": 0.6797, "nll_loss": 0.6573176383972168, "rewards/accuracies": 1.0, "rewards/chosen": -0.1639193892478943, "rewards/margins": 0.1849917471408844, "rewards/rejected": -0.3489111363887787, "step": 3190 }, { "epoch": 8.736481861738536, "grad_norm": 2.7792587280273438, "learning_rate": 5.628767123287671e-07, "log_odds_chosen": 2.3389205932617188, "log_odds_ratio": -0.19407859444618225, "logits/chosen": 1.0608819723129272, "logits/rejected": 1.0831100940704346, "logps/chosen": -1.7120044231414795, "logps/rejected": -3.8892478942871094, "loss": 0.5927, "nll_loss": 0.5732665061950684, "rewards/accuracies": 1.0, "rewards/chosen": -0.1712004393339157, "rewards/margins": 0.21772436797618866, "rewards/rejected": -0.388924777507782, "step": 3191 }, { "epoch": 8.739219712525667, "grad_norm": 3.506181478500366, "learning_rate": 5.627397260273973e-07, "log_odds_chosen": 1.9218778610229492, "log_odds_ratio": -0.37671998143196106, "logits/chosen": 0.675229549407959, "logits/rejected": 0.6873055696487427, "logps/chosen": -1.5698192119598389, "logps/rejected": -3.3092799186706543, "loss": 0.5903, "nll_loss": 0.5526338815689087, "rewards/accuracies": 0.875, "rewards/chosen": -0.1569819301366806, "rewards/margins": 0.1739460676908493, "rewards/rejected": -0.3309280276298523, "step": 3192 }, { "epoch": 8.7419575633128, "grad_norm": 3.1881766319274902, "learning_rate": 5.626027397260273e-07, "log_odds_chosen": 2.7313756942749023, "log_odds_ratio": -0.18934589624404907, "logits/chosen": 0.9568184614181519, "logits/rejected": 0.9782739281654358, "logps/chosen": -1.513416051864624, "logps/rejected": -3.813046455383301, "loss": 0.6119, "nll_loss": 0.5929523706436157, "rewards/accuracies": 0.875, "rewards/chosen": -0.15134160220623016, "rewards/margins": 0.229963019490242, "rewards/rejected": -0.38130462169647217, "step": 3193 }, { "epoch": 8.744695414099931, "grad_norm": 7.993833541870117, "learning_rate": 5.624657534246575e-07, "log_odds_chosen": 1.347949743270874, "log_odds_ratio": -0.6303212642669678, "logits/chosen": 0.975853681564331, "logits/rejected": 0.957277774810791, "logps/chosen": -2.5951664447784424, "logps/rejected": -3.7036631107330322, "loss": 0.6992, "nll_loss": 0.636208176612854, "rewards/accuracies": 0.75, "rewards/chosen": -0.2595166563987732, "rewards/margins": 0.11084967106580734, "rewards/rejected": -0.37036633491516113, "step": 3194 }, { "epoch": 8.747433264887064, "grad_norm": 3.5649290084838867, "learning_rate": 5.623287671232877e-07, "log_odds_chosen": 1.874996304512024, "log_odds_ratio": -0.26815280318260193, "logits/chosen": 0.7150171995162964, "logits/rejected": 0.7229421138763428, "logps/chosen": -2.1956663131713867, "logps/rejected": -3.9345004558563232, "loss": 0.7138, "nll_loss": 0.6870214939117432, "rewards/accuracies": 1.0, "rewards/chosen": -0.21956664323806763, "rewards/margins": 0.17388342320919037, "rewards/rejected": -0.3934500515460968, "step": 3195 }, { "epoch": 8.750171115674195, "grad_norm": 4.487374782562256, "learning_rate": 5.621917808219177e-07, "log_odds_chosen": 3.502567768096924, "log_odds_ratio": -0.3278183937072754, "logits/chosen": 1.2776687145233154, "logits/rejected": 1.2523295879364014, "logps/chosen": -2.259261131286621, "logps/rejected": -5.604539394378662, "loss": 0.6474, "nll_loss": 0.614591121673584, "rewards/accuracies": 0.875, "rewards/chosen": -0.22592613101005554, "rewards/margins": 0.3345278203487396, "rewards/rejected": -0.5604540109634399, "step": 3196 }, { "epoch": 8.752908966461328, "grad_norm": 7.094378471374512, "learning_rate": 5.620547945205479e-07, "log_odds_chosen": 2.538102149963379, "log_odds_ratio": -0.4249943494796753, "logits/chosen": 1.1487857103347778, "logits/rejected": 1.0833786725997925, "logps/chosen": -1.722068190574646, "logps/rejected": -4.005088806152344, "loss": 0.5901, "nll_loss": 0.5475745797157288, "rewards/accuracies": 0.75, "rewards/chosen": -0.1722068339586258, "rewards/margins": 0.2283020317554474, "rewards/rejected": -0.400508850812912, "step": 3197 }, { "epoch": 8.75564681724846, "grad_norm": 3.8424673080444336, "learning_rate": 5.61917808219178e-07, "log_odds_chosen": 2.4340248107910156, "log_odds_ratio": -0.2092980295419693, "logits/chosen": 1.0573898553848267, "logits/rejected": 1.115415096282959, "logps/chosen": -1.7071304321289062, "logps/rejected": -3.886035442352295, "loss": 0.5155, "nll_loss": 0.4945213496685028, "rewards/accuracies": 1.0, "rewards/chosen": -0.17071306705474854, "rewards/margins": 0.21789048612117767, "rewards/rejected": -0.388603538274765, "step": 3198 }, { "epoch": 8.758384668035593, "grad_norm": 3.56439208984375, "learning_rate": 5.617808219178082e-07, "log_odds_chosen": 1.5448856353759766, "log_odds_ratio": -0.2719203531742096, "logits/chosen": 0.848800778388977, "logits/rejected": 0.7959336042404175, "logps/chosen": -1.3648743629455566, "logps/rejected": -2.7081127166748047, "loss": 0.6249, "nll_loss": 0.5977027416229248, "rewards/accuracies": 1.0, "rewards/chosen": -0.1364874392747879, "rewards/margins": 0.13432380557060242, "rewards/rejected": -0.2708112597465515, "step": 3199 }, { "epoch": 8.761122518822724, "grad_norm": 3.1006076335906982, "learning_rate": 5.616438356164383e-07, "log_odds_chosen": 3.6182453632354736, "log_odds_ratio": -0.09064232558012009, "logits/chosen": 1.2300844192504883, "logits/rejected": 1.262436032295227, "logps/chosen": -2.0371620655059814, "logps/rejected": -5.429835319519043, "loss": 0.5712, "nll_loss": 0.5621755123138428, "rewards/accuracies": 1.0, "rewards/chosen": -0.2037162184715271, "rewards/margins": 0.3392673134803772, "rewards/rejected": -0.5429835319519043, "step": 3200 }, { "epoch": 8.763860369609857, "grad_norm": 3.9453797340393066, "learning_rate": 5.615068493150684e-07, "log_odds_chosen": 1.188745379447937, "log_odds_ratio": -0.4509684443473816, "logits/chosen": 0.8667407035827637, "logits/rejected": 0.8595895171165466, "logps/chosen": -1.91249418258667, "logps/rejected": -3.022104501724243, "loss": 0.7747, "nll_loss": 0.729595422744751, "rewards/accuracies": 0.75, "rewards/chosen": -0.19124941527843475, "rewards/margins": 0.11096104979515076, "rewards/rejected": -0.3022104501724243, "step": 3201 }, { "epoch": 8.766598220396988, "grad_norm": 5.601293087005615, "learning_rate": 5.613698630136986e-07, "log_odds_chosen": 1.328066110610962, "log_odds_ratio": -0.3227166533470154, "logits/chosen": 1.0350010395050049, "logits/rejected": 0.9564050436019897, "logps/chosen": -2.212247371673584, "logps/rejected": -3.4523518085479736, "loss": 0.683, "nll_loss": 0.6507112979888916, "rewards/accuracies": 0.875, "rewards/chosen": -0.22122472524642944, "rewards/margins": 0.12401046603918076, "rewards/rejected": -0.3452351987361908, "step": 3202 }, { "epoch": 8.76933607118412, "grad_norm": 3.484002113342285, "learning_rate": 5.612328767123287e-07, "log_odds_chosen": 3.100121021270752, "log_odds_ratio": -0.21395909786224365, "logits/chosen": 0.8643437027931213, "logits/rejected": 0.8552870750427246, "logps/chosen": -2.1214003562927246, "logps/rejected": -5.038928031921387, "loss": 0.6336, "nll_loss": 0.6121923327445984, "rewards/accuracies": 1.0, "rewards/chosen": -0.2121400237083435, "rewards/margins": 0.29175275564193726, "rewards/rejected": -0.5038927793502808, "step": 3203 }, { "epoch": 8.772073921971252, "grad_norm": 3.0520033836364746, "learning_rate": 5.610958904109588e-07, "log_odds_chosen": 1.8986384868621826, "log_odds_ratio": -0.25930824875831604, "logits/chosen": 0.9982274770736694, "logits/rejected": 0.9567288160324097, "logps/chosen": -1.5723669528961182, "logps/rejected": -3.304988384246826, "loss": 0.5933, "nll_loss": 0.5674042701721191, "rewards/accuracies": 1.0, "rewards/chosen": -0.157236710190773, "rewards/margins": 0.17326214909553528, "rewards/rejected": -0.3304988741874695, "step": 3204 }, { "epoch": 8.774811772758385, "grad_norm": 3.1409621238708496, "learning_rate": 5.60958904109589e-07, "log_odds_chosen": 4.127392292022705, "log_odds_ratio": -0.11091878265142441, "logits/chosen": 0.9842305183410645, "logits/rejected": 0.9130921363830566, "logps/chosen": -1.8324952125549316, "logps/rejected": -5.731423854827881, "loss": 0.7338, "nll_loss": 0.722710907459259, "rewards/accuracies": 1.0, "rewards/chosen": -0.18324953317642212, "rewards/margins": 0.3898928761482239, "rewards/rejected": -0.573142409324646, "step": 3205 }, { "epoch": 8.777549623545516, "grad_norm": 5.668036937713623, "learning_rate": 5.608219178082192e-07, "log_odds_chosen": 3.15891170501709, "log_odds_ratio": -0.6602843999862671, "logits/chosen": 0.8616719841957092, "logits/rejected": 0.9348951578140259, "logps/chosen": -3.137881278991699, "logps/rejected": -6.267996788024902, "loss": 0.7473, "nll_loss": 0.681256890296936, "rewards/accuracies": 0.75, "rewards/chosen": -0.31378817558288574, "rewards/margins": 0.3130115866661072, "rewards/rejected": -0.6267997026443481, "step": 3206 }, { "epoch": 8.780287474332649, "grad_norm": 2.9190592765808105, "learning_rate": 5.606849315068492e-07, "log_odds_chosen": 5.2856903076171875, "log_odds_ratio": -0.1339913308620453, "logits/chosen": 1.1211819648742676, "logits/rejected": 1.1191356182098389, "logps/chosen": -2.4013853073120117, "logps/rejected": -7.576521873474121, "loss": 0.6617, "nll_loss": 0.6483097672462463, "rewards/accuracies": 0.875, "rewards/chosen": -0.24013850092887878, "rewards/margins": 0.517513632774353, "rewards/rejected": -0.7576521635055542, "step": 3207 }, { "epoch": 8.78302532511978, "grad_norm": 3.3932816982269287, "learning_rate": 5.605479452054794e-07, "log_odds_chosen": 2.1375231742858887, "log_odds_ratio": -0.22011636197566986, "logits/chosen": 0.6683197617530823, "logits/rejected": 0.6072942614555359, "logps/chosen": -1.7764112949371338, "logps/rejected": -3.700937271118164, "loss": 0.6358, "nll_loss": 0.6137920022010803, "rewards/accuracies": 1.0, "rewards/chosen": -0.1776411235332489, "rewards/margins": 0.19245260953903198, "rewards/rejected": -0.3700937330722809, "step": 3208 }, { "epoch": 8.785763175906913, "grad_norm": 3.1125328540802, "learning_rate": 5.604109589041096e-07, "log_odds_chosen": 2.444841146469116, "log_odds_ratio": -0.23027914762496948, "logits/chosen": 0.821101188659668, "logits/rejected": 0.7704848051071167, "logps/chosen": -1.5272910594940186, "logps/rejected": -3.7837276458740234, "loss": 0.6228, "nll_loss": 0.5997974872589111, "rewards/accuracies": 1.0, "rewards/chosen": -0.1527291089296341, "rewards/margins": 0.22564367949962616, "rewards/rejected": -0.37837275862693787, "step": 3209 }, { "epoch": 8.788501026694044, "grad_norm": 3.9276115894317627, "learning_rate": 5.602739726027396e-07, "log_odds_chosen": 0.6511732339859009, "log_odds_ratio": -0.5572277307510376, "logits/chosen": 0.8125641942024231, "logits/rejected": 0.8146158456802368, "logps/chosen": -2.2705719470977783, "logps/rejected": -2.747166872024536, "loss": 0.6209, "nll_loss": 0.5651343464851379, "rewards/accuracies": 0.875, "rewards/chosen": -0.22705718874931335, "rewards/margins": 0.0476594939827919, "rewards/rejected": -0.27471670508384705, "step": 3210 }, { "epoch": 8.791238877481177, "grad_norm": 3.099233388900757, "learning_rate": 5.601369863013698e-07, "log_odds_chosen": 4.000701904296875, "log_odds_ratio": -0.040907979011535645, "logits/chosen": 0.9866213798522949, "logits/rejected": 1.0267853736877441, "logps/chosen": -2.3361399173736572, "logps/rejected": -6.187288761138916, "loss": 0.6601, "nll_loss": 0.65604567527771, "rewards/accuracies": 1.0, "rewards/chosen": -0.2336139976978302, "rewards/margins": 0.3851149082183838, "rewards/rejected": -0.6187289357185364, "step": 3211 }, { "epoch": 8.793976728268309, "grad_norm": 3.184641122817993, "learning_rate": 5.6e-07, "log_odds_chosen": 1.2420995235443115, "log_odds_ratio": -0.3635571300983429, "logits/chosen": 0.8576717376708984, "logits/rejected": 0.8955543041229248, "logps/chosen": -1.8629169464111328, "logps/rejected": -2.9763033390045166, "loss": 0.6545, "nll_loss": 0.6181830763816833, "rewards/accuracies": 0.875, "rewards/chosen": -0.18629169464111328, "rewards/margins": 0.11133866012096405, "rewards/rejected": -0.2976303696632385, "step": 3212 }, { "epoch": 8.796714579055442, "grad_norm": 3.3599531650543213, "learning_rate": 5.598630136986301e-07, "log_odds_chosen": 2.627354145050049, "log_odds_ratio": -0.18613767623901367, "logits/chosen": 0.9291072487831116, "logits/rejected": 0.9160245656967163, "logps/chosen": -2.196763515472412, "logps/rejected": -4.682306289672852, "loss": 0.6047, "nll_loss": 0.5861077904701233, "rewards/accuracies": 1.0, "rewards/chosen": -0.21967636048793793, "rewards/margins": 0.2485542893409729, "rewards/rejected": -0.468230664730072, "step": 3213 }, { "epoch": 8.799452429842574, "grad_norm": 2.9320783615112305, "learning_rate": 5.597260273972602e-07, "log_odds_chosen": 4.120182514190674, "log_odds_ratio": -0.04279708117246628, "logits/chosen": 0.745369553565979, "logits/rejected": 0.7303649187088013, "logps/chosen": -1.5554230213165283, "logps/rejected": -5.327396392822266, "loss": 0.717, "nll_loss": 0.7127035856246948, "rewards/accuracies": 1.0, "rewards/chosen": -0.1555423140525818, "rewards/margins": 0.3771973252296448, "rewards/rejected": -0.5327396392822266, "step": 3214 }, { "epoch": 8.802190280629706, "grad_norm": 3.904062509536743, "learning_rate": 5.595890410958903e-07, "log_odds_chosen": 2.3713154792785645, "log_odds_ratio": -0.28950321674346924, "logits/chosen": 1.1794244050979614, "logits/rejected": 1.1838645935058594, "logps/chosen": -2.2759482860565186, "logps/rejected": -4.498509407043457, "loss": 0.7004, "nll_loss": 0.6714257001876831, "rewards/accuracies": 0.875, "rewards/chosen": -0.22759482264518738, "rewards/margins": 0.22225616872310638, "rewards/rejected": -0.44985097646713257, "step": 3215 }, { "epoch": 8.804928131416839, "grad_norm": 2.8839499950408936, "learning_rate": 5.594520547945205e-07, "log_odds_chosen": 2.0741677284240723, "log_odds_ratio": -0.20020931959152222, "logits/chosen": 0.9854384660720825, "logits/rejected": 0.9542315602302551, "logps/chosen": -1.7879953384399414, "logps/rejected": -3.6721410751342773, "loss": 0.6143, "nll_loss": 0.5942383408546448, "rewards/accuracies": 1.0, "rewards/chosen": -0.17879952490329742, "rewards/margins": 0.1884145736694336, "rewards/rejected": -0.3672140836715698, "step": 3216 }, { "epoch": 8.80766598220397, "grad_norm": 2.987236261367798, "learning_rate": 5.593150684931506e-07, "log_odds_chosen": 3.148423194885254, "log_odds_ratio": -0.22553102672100067, "logits/chosen": 1.037724256515503, "logits/rejected": 1.014891505241394, "logps/chosen": -2.2649970054626465, "logps/rejected": -5.234301567077637, "loss": 0.5956, "nll_loss": 0.5730646252632141, "rewards/accuracies": 1.0, "rewards/chosen": -0.22649972140789032, "rewards/margins": 0.29693037271499634, "rewards/rejected": -0.5234301090240479, "step": 3217 }, { "epoch": 8.810403832991103, "grad_norm": 7.649346351623535, "learning_rate": 5.591780821917807e-07, "log_odds_chosen": 1.2946150302886963, "log_odds_ratio": -0.6240147352218628, "logits/chosen": 0.7671282887458801, "logits/rejected": 0.7221681475639343, "logps/chosen": -3.8412632942199707, "logps/rejected": -5.079869747161865, "loss": 0.8495, "nll_loss": 0.7871401309967041, "rewards/accuracies": 0.625, "rewards/chosen": -0.38412633538246155, "rewards/margins": 0.12386065721511841, "rewards/rejected": -0.5079870223999023, "step": 3218 }, { "epoch": 8.813141683778234, "grad_norm": 3.433098793029785, "learning_rate": 5.590410958904109e-07, "log_odds_chosen": 2.730473518371582, "log_odds_ratio": -0.20382800698280334, "logits/chosen": 1.0923887491226196, "logits/rejected": 1.1558027267456055, "logps/chosen": -2.0443122386932373, "logps/rejected": -4.637078285217285, "loss": 0.5902, "nll_loss": 0.5697970390319824, "rewards/accuracies": 1.0, "rewards/chosen": -0.20443125069141388, "rewards/margins": 0.2592766284942627, "rewards/rejected": -0.4637078642845154, "step": 3219 }, { "epoch": 8.815879534565367, "grad_norm": 3.8727996349334717, "learning_rate": 5.589041095890411e-07, "log_odds_chosen": 1.5142275094985962, "log_odds_ratio": -0.2924204468727112, "logits/chosen": 1.0241236686706543, "logits/rejected": 1.0221991539001465, "logps/chosen": -2.5872206687927246, "logps/rejected": -4.021365165710449, "loss": 0.6111, "nll_loss": 0.5818802118301392, "rewards/accuracies": 0.875, "rewards/chosen": -0.25872206687927246, "rewards/margins": 0.1434144675731659, "rewards/rejected": -0.40213653445243835, "step": 3220 }, { "epoch": 8.818617385352498, "grad_norm": 5.244020938873291, "learning_rate": 5.587671232876711e-07, "log_odds_chosen": 1.883953332901001, "log_odds_ratio": -0.2577663064002991, "logits/chosen": 1.0584180355072021, "logits/rejected": 1.0340900421142578, "logps/chosen": -2.4729413986206055, "logps/rejected": -4.245815277099609, "loss": 0.6666, "nll_loss": 0.6408716440200806, "rewards/accuracies": 0.875, "rewards/chosen": -0.24729414284229279, "rewards/margins": 0.17728736996650696, "rewards/rejected": -0.42458152770996094, "step": 3221 }, { "epoch": 8.821355236139631, "grad_norm": 3.1598188877105713, "learning_rate": 5.586301369863013e-07, "log_odds_chosen": 2.3010191917419434, "log_odds_ratio": -0.22404368221759796, "logits/chosen": 1.0635254383087158, "logits/rejected": 1.0921741724014282, "logps/chosen": -1.9095731973648071, "logps/rejected": -4.072290420532227, "loss": 0.5575, "nll_loss": 0.5351449251174927, "rewards/accuracies": 1.0, "rewards/chosen": -0.19095732271671295, "rewards/margins": 0.2162717878818512, "rewards/rejected": -0.40722912549972534, "step": 3222 }, { "epoch": 8.824093086926762, "grad_norm": 4.724067211151123, "learning_rate": 5.584931506849316e-07, "log_odds_chosen": 0.6312904357910156, "log_odds_ratio": -0.6396549344062805, "logits/chosen": 0.9217855930328369, "logits/rejected": 0.8282713294029236, "logps/chosen": -1.9397823810577393, "logps/rejected": -2.49294376373291, "loss": 0.67, "nll_loss": 0.6059984564781189, "rewards/accuracies": 0.75, "rewards/chosen": -0.1939782351255417, "rewards/margins": 0.055316150188446045, "rewards/rejected": -0.24929438531398773, "step": 3223 }, { "epoch": 8.826830937713895, "grad_norm": 3.587263345718384, "learning_rate": 5.583561643835615e-07, "log_odds_chosen": 4.153482913970947, "log_odds_ratio": -0.12857046723365784, "logits/chosen": 1.0688666105270386, "logits/rejected": 1.081821322441101, "logps/chosen": -2.331691265106201, "logps/rejected": -6.361433029174805, "loss": 0.6601, "nll_loss": 0.6472065448760986, "rewards/accuracies": 1.0, "rewards/chosen": -0.23316910862922668, "rewards/margins": 0.4029742181301117, "rewards/rejected": -0.6361433267593384, "step": 3224 }, { "epoch": 8.829568788501026, "grad_norm": 3.707150459289551, "learning_rate": 5.582191780821918e-07, "log_odds_chosen": 1.4269911050796509, "log_odds_ratio": -0.32885387539863586, "logits/chosen": 0.7007823586463928, "logits/rejected": 0.6675304174423218, "logps/chosen": -1.7779589891433716, "logps/rejected": -3.065249443054199, "loss": 0.6529, "nll_loss": 0.619995653629303, "rewards/accuracies": 0.875, "rewards/chosen": -0.1777958869934082, "rewards/margins": 0.12872906029224396, "rewards/rejected": -0.30652493238449097, "step": 3225 }, { "epoch": 8.83230663928816, "grad_norm": 3.3844504356384277, "learning_rate": 5.58082191780822e-07, "log_odds_chosen": 2.075655460357666, "log_odds_ratio": -0.2808542847633362, "logits/chosen": 0.8351436257362366, "logits/rejected": 0.7402687072753906, "logps/chosen": -1.8188260793685913, "logps/rejected": -3.778862953186035, "loss": 0.6378, "nll_loss": 0.6097081303596497, "rewards/accuracies": 0.875, "rewards/chosen": -0.1818826049566269, "rewards/margins": 0.19600369036197662, "rewards/rejected": -0.3778862953186035, "step": 3226 }, { "epoch": 8.83504449007529, "grad_norm": 5.001554489135742, "learning_rate": 5.579452054794521e-07, "log_odds_chosen": 2.4287962913513184, "log_odds_ratio": -0.18968504667282104, "logits/chosen": 0.7994170784950256, "logits/rejected": 0.7512435913085938, "logps/chosen": -1.9588680267333984, "logps/rejected": -4.189950466156006, "loss": 0.6114, "nll_loss": 0.5924553275108337, "rewards/accuracies": 1.0, "rewards/chosen": -0.19588680565357208, "rewards/margins": 0.22310824692249298, "rewards/rejected": -0.41899505257606506, "step": 3227 }, { "epoch": 8.837782340862423, "grad_norm": 3.5208473205566406, "learning_rate": 5.578082191780822e-07, "log_odds_chosen": 2.0090441703796387, "log_odds_ratio": -0.3192135989665985, "logits/chosen": 0.7455791234970093, "logits/rejected": 0.6650092601776123, "logps/chosen": -1.5669593811035156, "logps/rejected": -3.43945050239563, "loss": 0.6547, "nll_loss": 0.6227301359176636, "rewards/accuracies": 1.0, "rewards/chosen": -0.15669593214988708, "rewards/margins": 0.18724912405014038, "rewards/rejected": -0.34394508600234985, "step": 3228 }, { "epoch": 8.840520191649555, "grad_norm": 3.2711386680603027, "learning_rate": 5.576712328767123e-07, "log_odds_chosen": 2.398561954498291, "log_odds_ratio": -0.26886630058288574, "logits/chosen": 0.8559529185295105, "logits/rejected": 0.7813922762870789, "logps/chosen": -2.1273412704467773, "logps/rejected": -4.390877723693848, "loss": 0.6316, "nll_loss": 0.604724645614624, "rewards/accuracies": 0.875, "rewards/chosen": -0.2127341330051422, "rewards/margins": 0.22635364532470703, "rewards/rejected": -0.43908774852752686, "step": 3229 }, { "epoch": 8.843258042436688, "grad_norm": 7.2319440841674805, "learning_rate": 5.575342465753425e-07, "log_odds_chosen": 1.2972004413604736, "log_odds_ratio": -0.5527492761611938, "logits/chosen": 0.7483303546905518, "logits/rejected": 0.6243556141853333, "logps/chosen": -1.8173680305480957, "logps/rejected": -2.9154462814331055, "loss": 0.6706, "nll_loss": 0.6152778267860413, "rewards/accuracies": 0.875, "rewards/chosen": -0.18173682689666748, "rewards/margins": 0.1098078116774559, "rewards/rejected": -0.2915446162223816, "step": 3230 }, { "epoch": 8.845995893223819, "grad_norm": 4.277912139892578, "learning_rate": 5.573972602739726e-07, "log_odds_chosen": 1.1296870708465576, "log_odds_ratio": -0.5403106808662415, "logits/chosen": 0.7433964014053345, "logits/rejected": 0.7029904723167419, "logps/chosen": -2.262890100479126, "logps/rejected": -3.290403366088867, "loss": 0.6637, "nll_loss": 0.6097166538238525, "rewards/accuracies": 0.875, "rewards/chosen": -0.2262890338897705, "rewards/margins": 0.10275131464004517, "rewards/rejected": -0.3290403485298157, "step": 3231 }, { "epoch": 8.848733744010952, "grad_norm": 3.9553425312042236, "learning_rate": 5.572602739726027e-07, "log_odds_chosen": 2.258544921875, "log_odds_ratio": -0.17596295475959778, "logits/chosen": 0.7555134892463684, "logits/rejected": 0.7309106588363647, "logps/chosen": -1.9513500928878784, "logps/rejected": -3.897089719772339, "loss": 0.6086, "nll_loss": 0.5909774303436279, "rewards/accuracies": 1.0, "rewards/chosen": -0.19513501226902008, "rewards/margins": 0.19457395374774933, "rewards/rejected": -0.3897089660167694, "step": 3232 }, { "epoch": 8.851471594798083, "grad_norm": 3.0286357402801514, "learning_rate": 5.571232876712329e-07, "log_odds_chosen": 3.9343976974487305, "log_odds_ratio": -0.1974228024482727, "logits/chosen": 0.9182400703430176, "logits/rejected": 0.9595615863800049, "logps/chosen": -2.651630401611328, "logps/rejected": -6.477710723876953, "loss": 0.6978, "nll_loss": 0.678014874458313, "rewards/accuracies": 1.0, "rewards/chosen": -0.2651630640029907, "rewards/margins": 0.3826080560684204, "rewards/rejected": -0.6477710604667664, "step": 3233 }, { "epoch": 8.854209445585216, "grad_norm": 3.3233964443206787, "learning_rate": 5.569863013698631e-07, "log_odds_chosen": 1.0320028066635132, "log_odds_ratio": -0.34063857793807983, "logits/chosen": 0.7147969603538513, "logits/rejected": 0.6627730131149292, "logps/chosen": -1.5936973094940186, "logps/rejected": -2.403496742248535, "loss": 0.6209, "nll_loss": 0.5868701338768005, "rewards/accuracies": 1.0, "rewards/chosen": -0.15936973690986633, "rewards/margins": 0.08097995072603226, "rewards/rejected": -0.240349680185318, "step": 3234 }, { "epoch": 8.856947296372347, "grad_norm": 2.946455717086792, "learning_rate": 5.568493150684931e-07, "log_odds_chosen": 2.274766683578491, "log_odds_ratio": -0.15486429631710052, "logits/chosen": 1.067625641822815, "logits/rejected": 0.9888875484466553, "logps/chosen": -1.5184680223464966, "logps/rejected": -3.5809805393218994, "loss": 0.5667, "nll_loss": 0.5512605309486389, "rewards/accuracies": 1.0, "rewards/chosen": -0.15184679627418518, "rewards/margins": 0.20625124871730804, "rewards/rejected": -0.35809803009033203, "step": 3235 }, { "epoch": 8.85968514715948, "grad_norm": 3.353970766067505, "learning_rate": 5.567123287671233e-07, "log_odds_chosen": 2.0298526287078857, "log_odds_ratio": -0.2160625457763672, "logits/chosen": 0.862889289855957, "logits/rejected": 0.7880245447158813, "logps/chosen": -1.5004512071609497, "logps/rejected": -3.3149805068969727, "loss": 0.6185, "nll_loss": 0.5968810319900513, "rewards/accuracies": 1.0, "rewards/chosen": -0.15004514157772064, "rewards/margins": 0.1814529448747635, "rewards/rejected": -0.33149808645248413, "step": 3236 }, { "epoch": 8.862422997946611, "grad_norm": 3.1738455295562744, "learning_rate": 5.565753424657535e-07, "log_odds_chosen": 1.2113466262817383, "log_odds_ratio": -0.3556443452835083, "logits/chosen": 0.7911798357963562, "logits/rejected": 0.7068295478820801, "logps/chosen": -1.7392585277557373, "logps/rejected": -2.824924945831299, "loss": 0.5872, "nll_loss": 0.5516459345817566, "rewards/accuracies": 0.75, "rewards/chosen": -0.17392586171627045, "rewards/margins": 0.10856664925813675, "rewards/rejected": -0.2824925184249878, "step": 3237 }, { "epoch": 8.865160848733744, "grad_norm": 3.7974586486816406, "learning_rate": 5.564383561643835e-07, "log_odds_chosen": 2.3755877017974854, "log_odds_ratio": -0.31289535760879517, "logits/chosen": 0.88618004322052, "logits/rejected": 0.8838739395141602, "logps/chosen": -1.8678410053253174, "logps/rejected": -4.148260116577148, "loss": 0.6405, "nll_loss": 0.6091609001159668, "rewards/accuracies": 0.875, "rewards/chosen": -0.18678411841392517, "rewards/margins": 0.2280418872833252, "rewards/rejected": -0.41482600569725037, "step": 3238 }, { "epoch": 8.867898699520875, "grad_norm": 3.226870059967041, "learning_rate": 5.563013698630137e-07, "log_odds_chosen": 1.9038729667663574, "log_odds_ratio": -0.3244495689868927, "logits/chosen": 0.7333075404167175, "logits/rejected": 0.6700884699821472, "logps/chosen": -1.1840389966964722, "logps/rejected": -2.8307981491088867, "loss": 0.5976, "nll_loss": 0.5651204586029053, "rewards/accuracies": 1.0, "rewards/chosen": -0.11840389668941498, "rewards/margins": 0.16467593610286713, "rewards/rejected": -0.2830798327922821, "step": 3239 }, { "epoch": 8.870636550308008, "grad_norm": 3.4002938270568848, "learning_rate": 5.561643835616439e-07, "log_odds_chosen": 2.7063236236572266, "log_odds_ratio": -0.11872055381536484, "logits/chosen": 1.0250593423843384, "logits/rejected": 0.8934195637702942, "logps/chosen": -2.4289684295654297, "logps/rejected": -4.976999282836914, "loss": 0.7099, "nll_loss": 0.6979798078536987, "rewards/accuracies": 1.0, "rewards/chosen": -0.24289685487747192, "rewards/margins": 0.2548030614852905, "rewards/rejected": -0.49769991636276245, "step": 3240 }, { "epoch": 8.873374401095141, "grad_norm": 3.548063278198242, "learning_rate": 5.56027397260274e-07, "log_odds_chosen": 4.164107322692871, "log_odds_ratio": -0.08979648351669312, "logits/chosen": 0.8678101897239685, "logits/rejected": 0.9388002753257751, "logps/chosen": -1.6646108627319336, "logps/rejected": -5.578609943389893, "loss": 0.7271, "nll_loss": 0.7181169986724854, "rewards/accuracies": 1.0, "rewards/chosen": -0.16646109521389008, "rewards/margins": 0.39139991998672485, "rewards/rejected": -0.5578610301017761, "step": 3241 }, { "epoch": 8.876112251882272, "grad_norm": 4.631372451782227, "learning_rate": 5.558904109589041e-07, "log_odds_chosen": 1.8062634468078613, "log_odds_ratio": -0.46806660294532776, "logits/chosen": 0.9850348234176636, "logits/rejected": 1.045865774154663, "logps/chosen": -2.633347988128662, "logps/rejected": -4.276366233825684, "loss": 0.661, "nll_loss": 0.6141776442527771, "rewards/accuracies": 0.875, "rewards/chosen": -0.2633347809314728, "rewards/margins": 0.16430185735225677, "rewards/rejected": -0.42763662338256836, "step": 3242 }, { "epoch": 8.878850102669405, "grad_norm": 3.1602182388305664, "learning_rate": 5.557534246575343e-07, "log_odds_chosen": 1.8583515882492065, "log_odds_ratio": -0.1933910846710205, "logits/chosen": 0.8013640642166138, "logits/rejected": 0.8088179230690002, "logps/chosen": -2.5169789791107178, "logps/rejected": -4.264683723449707, "loss": 0.5797, "nll_loss": 0.5603435039520264, "rewards/accuracies": 1.0, "rewards/chosen": -0.2516978979110718, "rewards/margins": 0.1747705340385437, "rewards/rejected": -0.4264684319496155, "step": 3243 }, { "epoch": 8.881587953456537, "grad_norm": 3.4668214321136475, "learning_rate": 5.556164383561644e-07, "log_odds_chosen": 3.7746572494506836, "log_odds_ratio": -0.09216876327991486, "logits/chosen": 0.6376383304595947, "logits/rejected": 0.6135948896408081, "logps/chosen": -2.0259246826171875, "logps/rejected": -5.618945598602295, "loss": 0.7739, "nll_loss": 0.7646428346633911, "rewards/accuracies": 1.0, "rewards/chosen": -0.20259246230125427, "rewards/margins": 0.3593021035194397, "rewards/rejected": -0.5618945956230164, "step": 3244 }, { "epoch": 8.88432580424367, "grad_norm": 4.362943649291992, "learning_rate": 5.554794520547945e-07, "log_odds_chosen": 1.4822282791137695, "log_odds_ratio": -0.3625336289405823, "logits/chosen": 0.7761778235435486, "logits/rejected": 0.7222337126731873, "logps/chosen": -2.3029613494873047, "logps/rejected": -3.6423850059509277, "loss": 0.5778, "nll_loss": 0.5415318608283997, "rewards/accuracies": 0.875, "rewards/chosen": -0.23029613494873047, "rewards/margins": 0.1339423656463623, "rewards/rejected": -0.3642385005950928, "step": 3245 }, { "epoch": 8.8870636550308, "grad_norm": 3.404259443283081, "learning_rate": 5.553424657534246e-07, "log_odds_chosen": 2.699075937271118, "log_odds_ratio": -0.257112592458725, "logits/chosen": 0.9690984487533569, "logits/rejected": 0.9227226376533508, "logps/chosen": -2.202078342437744, "logps/rejected": -4.825882911682129, "loss": 0.742, "nll_loss": 0.7163015007972717, "rewards/accuracies": 0.875, "rewards/chosen": -0.2202078253030777, "rewards/margins": 0.2623804807662964, "rewards/rejected": -0.4825883209705353, "step": 3246 }, { "epoch": 8.889801505817934, "grad_norm": 3.3797130584716797, "learning_rate": 5.552054794520548e-07, "log_odds_chosen": 2.435093879699707, "log_odds_ratio": -0.2595338225364685, "logits/chosen": 0.86079341173172, "logits/rejected": 0.8592199087142944, "logps/chosen": -1.8558220863342285, "logps/rejected": -3.9875330924987793, "loss": 0.6384, "nll_loss": 0.6124516725540161, "rewards/accuracies": 0.875, "rewards/chosen": -0.1855822205543518, "rewards/margins": 0.2131711095571518, "rewards/rejected": -0.3987533152103424, "step": 3247 }, { "epoch": 8.892539356605065, "grad_norm": 3.5406394004821777, "learning_rate": 5.55068493150685e-07, "log_odds_chosen": 1.761937141418457, "log_odds_ratio": -0.3114668130874634, "logits/chosen": 0.8011136054992676, "logits/rejected": 0.747016966342926, "logps/chosen": -2.324509382247925, "logps/rejected": -4.011591911315918, "loss": 0.7269, "nll_loss": 0.6957303285598755, "rewards/accuracies": 0.875, "rewards/chosen": -0.2324509471654892, "rewards/margins": 0.1687082201242447, "rewards/rejected": -0.4011591672897339, "step": 3248 }, { "epoch": 8.895277207392198, "grad_norm": 3.262178659439087, "learning_rate": 5.54931506849315e-07, "log_odds_chosen": 3.1701760292053223, "log_odds_ratio": -0.16239818930625916, "logits/chosen": 0.8710507154464722, "logits/rejected": 0.845482587814331, "logps/chosen": -1.4486806392669678, "logps/rejected": -4.370507717132568, "loss": 0.6028, "nll_loss": 0.5865710973739624, "rewards/accuracies": 1.0, "rewards/chosen": -0.14486806094646454, "rewards/margins": 0.29218271374702454, "rewards/rejected": -0.43705078959465027, "step": 3249 }, { "epoch": 8.898015058179329, "grad_norm": 6.484628677368164, "learning_rate": 5.547945205479452e-07, "log_odds_chosen": 2.3954434394836426, "log_odds_ratio": -0.2549460530281067, "logits/chosen": 1.127166748046875, "logits/rejected": 1.0836246013641357, "logps/chosen": -1.9090962409973145, "logps/rejected": -4.15761661529541, "loss": 0.6626, "nll_loss": 0.637091338634491, "rewards/accuracies": 0.875, "rewards/chosen": -0.19090962409973145, "rewards/margins": 0.2248520702123642, "rewards/rejected": -0.41576170921325684, "step": 3250 }, { "epoch": 8.900752908966462, "grad_norm": 3.6560521125793457, "learning_rate": 5.546575342465754e-07, "log_odds_chosen": 2.6113839149475098, "log_odds_ratio": -0.20039598643779755, "logits/chosen": 0.9611477851867676, "logits/rejected": 1.0166008472442627, "logps/chosen": -2.211392402648926, "logps/rejected": -4.730384349822998, "loss": 0.7837, "nll_loss": 0.7636837959289551, "rewards/accuracies": 1.0, "rewards/chosen": -0.22113923728466034, "rewards/margins": 0.25189921259880066, "rewards/rejected": -0.4730384349822998, "step": 3251 }, { "epoch": 8.903490759753593, "grad_norm": 4.783920764923096, "learning_rate": 5.545205479452054e-07, "log_odds_chosen": 3.3442039489746094, "log_odds_ratio": -0.3276181221008301, "logits/chosen": 0.7681368589401245, "logits/rejected": 0.7745643854141235, "logps/chosen": -1.8241714239120483, "logps/rejected": -4.822903633117676, "loss": 0.6833, "nll_loss": 0.650524377822876, "rewards/accuracies": 0.75, "rewards/chosen": -0.1824171543121338, "rewards/margins": 0.2998732328414917, "rewards/rejected": -0.4822903275489807, "step": 3252 }, { "epoch": 8.906228610540726, "grad_norm": 5.348326206207275, "learning_rate": 5.543835616438356e-07, "log_odds_chosen": 2.6005074977874756, "log_odds_ratio": -0.3066163659095764, "logits/chosen": 0.9256637096405029, "logits/rejected": 0.9319165945053101, "logps/chosen": -2.4179019927978516, "logps/rejected": -4.880792140960693, "loss": 0.6781, "nll_loss": 0.6474085450172424, "rewards/accuracies": 0.875, "rewards/chosen": -0.24179020524024963, "rewards/margins": 0.24628901481628418, "rewards/rejected": -0.4880792498588562, "step": 3253 }, { "epoch": 8.908966461327857, "grad_norm": 3.0396008491516113, "learning_rate": 5.542465753424658e-07, "log_odds_chosen": 2.510368824005127, "log_odds_ratio": -0.22348831593990326, "logits/chosen": 1.0076110363006592, "logits/rejected": 0.9906909465789795, "logps/chosen": -1.826350450515747, "logps/rejected": -4.130756378173828, "loss": 0.5537, "nll_loss": 0.5313210487365723, "rewards/accuracies": 1.0, "rewards/chosen": -0.18263503909111023, "rewards/margins": 0.23044061660766602, "rewards/rejected": -0.41307562589645386, "step": 3254 }, { "epoch": 8.91170431211499, "grad_norm": 3.275421142578125, "learning_rate": 5.541095890410959e-07, "log_odds_chosen": 3.706968307495117, "log_odds_ratio": -0.13094031810760498, "logits/chosen": 0.8292167782783508, "logits/rejected": 0.7952598333358765, "logps/chosen": -1.7428926229476929, "logps/rejected": -5.244775772094727, "loss": 0.562, "nll_loss": 0.5488935708999634, "rewards/accuracies": 1.0, "rewards/chosen": -0.1742892563343048, "rewards/margins": 0.350188285112381, "rewards/rejected": -0.5244775414466858, "step": 3255 }, { "epoch": 8.914442162902121, "grad_norm": 3.6596591472625732, "learning_rate": 5.53972602739726e-07, "log_odds_chosen": 2.524588108062744, "log_odds_ratio": -0.33252575993537903, "logits/chosen": 0.6535769701004028, "logits/rejected": 0.6364760398864746, "logps/chosen": -1.772993564605713, "logps/rejected": -4.188980579376221, "loss": 0.5593, "nll_loss": 0.5260039567947388, "rewards/accuracies": 0.875, "rewards/chosen": -0.1772993803024292, "rewards/margins": 0.2415987253189087, "rewards/rejected": -0.4188980758190155, "step": 3256 }, { "epoch": 8.917180013689254, "grad_norm": 3.076214075088501, "learning_rate": 5.538356164383562e-07, "log_odds_chosen": 1.6895954608917236, "log_odds_ratio": -0.24162934720516205, "logits/chosen": 0.6926356554031372, "logits/rejected": 0.5654133558273315, "logps/chosen": -1.7566649913787842, "logps/rejected": -3.2861015796661377, "loss": 0.6401, "nll_loss": 0.6159036159515381, "rewards/accuracies": 1.0, "rewards/chosen": -0.17566651105880737, "rewards/margins": 0.1529436707496643, "rewards/rejected": -0.3286101818084717, "step": 3257 }, { "epoch": 8.919917864476385, "grad_norm": 4.107439994812012, "learning_rate": 5.536986301369863e-07, "log_odds_chosen": 3.203519105911255, "log_odds_ratio": -0.2721491754055023, "logits/chosen": 0.7872152328491211, "logits/rejected": 0.815834641456604, "logps/chosen": -1.5574977397918701, "logps/rejected": -4.579474449157715, "loss": 0.7056, "nll_loss": 0.6783560514450073, "rewards/accuracies": 1.0, "rewards/chosen": -0.15574978291988373, "rewards/margins": 0.3021976351737976, "rewards/rejected": -0.45794740319252014, "step": 3258 }, { "epoch": 8.922655715263518, "grad_norm": 4.310036659240723, "learning_rate": 5.535616438356164e-07, "log_odds_chosen": 1.8157110214233398, "log_odds_ratio": -0.31174102425575256, "logits/chosen": 1.1617375612258911, "logits/rejected": 1.1785879135131836, "logps/chosen": -2.367847204208374, "logps/rejected": -3.9847488403320312, "loss": 0.5926, "nll_loss": 0.5613928437232971, "rewards/accuracies": 1.0, "rewards/chosen": -0.23678472638130188, "rewards/margins": 0.16169017553329468, "rewards/rejected": -0.39847487211227417, "step": 3259 }, { "epoch": 8.92539356605065, "grad_norm": 3.477180004119873, "learning_rate": 5.534246575342465e-07, "log_odds_chosen": 3.57511043548584, "log_odds_ratio": -0.12285371869802475, "logits/chosen": 1.007286548614502, "logits/rejected": 1.0271965265274048, "logps/chosen": -2.205786943435669, "logps/rejected": -5.635064125061035, "loss": 0.5604, "nll_loss": 0.5481001138687134, "rewards/accuracies": 1.0, "rewards/chosen": -0.22057870030403137, "rewards/margins": 0.3429276943206787, "rewards/rejected": -0.5635063648223877, "step": 3260 }, { "epoch": 8.928131416837783, "grad_norm": 2.6812222003936768, "learning_rate": 5.532876712328767e-07, "log_odds_chosen": 3.756775379180908, "log_odds_ratio": -0.15538853406906128, "logits/chosen": 0.9685888886451721, "logits/rejected": 0.8641098141670227, "logps/chosen": -1.6655815839767456, "logps/rejected": -5.169924736022949, "loss": 0.5922, "nll_loss": 0.5767089128494263, "rewards/accuracies": 1.0, "rewards/chosen": -0.1665581613779068, "rewards/margins": 0.3504343330860138, "rewards/rejected": -0.516992449760437, "step": 3261 }, { "epoch": 8.930869267624914, "grad_norm": 3.368699312210083, "learning_rate": 5.531506849315069e-07, "log_odds_chosen": 4.116513252258301, "log_odds_ratio": -0.09121930599212646, "logits/chosen": 0.8790525794029236, "logits/rejected": 0.8686567544937134, "logps/chosen": -1.9876673221588135, "logps/rejected": -5.914788246154785, "loss": 0.6325, "nll_loss": 0.6233879327774048, "rewards/accuracies": 1.0, "rewards/chosen": -0.19876675307750702, "rewards/margins": 0.3927120864391327, "rewards/rejected": -0.5914787650108337, "step": 3262 }, { "epoch": 8.933607118412047, "grad_norm": 5.906033992767334, "learning_rate": 5.530136986301369e-07, "log_odds_chosen": 2.2702391147613525, "log_odds_ratio": -0.39500272274017334, "logits/chosen": 0.7793389558792114, "logits/rejected": 0.7721129655838013, "logps/chosen": -2.29530668258667, "logps/rejected": -4.408147811889648, "loss": 0.6596, "nll_loss": 0.6201143264770508, "rewards/accuracies": 0.875, "rewards/chosen": -0.2295306921005249, "rewards/margins": 0.21128413081169128, "rewards/rejected": -0.4408147931098938, "step": 3263 }, { "epoch": 8.936344969199178, "grad_norm": 2.9332077503204346, "learning_rate": 5.528767123287671e-07, "log_odds_chosen": 3.5321521759033203, "log_odds_ratio": -0.2797883450984955, "logits/chosen": 0.9390155673027039, "logits/rejected": 0.9747991561889648, "logps/chosen": -2.09787654876709, "logps/rejected": -5.481425762176514, "loss": 0.6261, "nll_loss": 0.5981051325798035, "rewards/accuracies": 0.875, "rewards/chosen": -0.20978766679763794, "rewards/margins": 0.3383549153804779, "rewards/rejected": -0.5481425523757935, "step": 3264 }, { "epoch": 8.93908281998631, "grad_norm": 3.3749654293060303, "learning_rate": 5.527397260273973e-07, "log_odds_chosen": 2.2473087310791016, "log_odds_ratio": -0.17465727031230927, "logits/chosen": 0.9587879776954651, "logits/rejected": 0.9159338474273682, "logps/chosen": -1.6832406520843506, "logps/rejected": -3.75311017036438, "loss": 0.5685, "nll_loss": 0.5510357618331909, "rewards/accuracies": 1.0, "rewards/chosen": -0.1683240830898285, "rewards/margins": 0.2069869488477707, "rewards/rejected": -0.375311017036438, "step": 3265 }, { "epoch": 8.941820670773442, "grad_norm": 3.634787082672119, "learning_rate": 5.526027397260273e-07, "log_odds_chosen": 1.9663394689559937, "log_odds_ratio": -0.3760358393192291, "logits/chosen": 1.0985665321350098, "logits/rejected": 1.1322362422943115, "logps/chosen": -1.779627799987793, "logps/rejected": -3.5075697898864746, "loss": 0.6009, "nll_loss": 0.5633237361907959, "rewards/accuracies": 0.75, "rewards/chosen": -0.1779627799987793, "rewards/margins": 0.1727941930294037, "rewards/rejected": -0.350756973028183, "step": 3266 }, { "epoch": 8.944558521560575, "grad_norm": 3.451011896133423, "learning_rate": 5.524657534246575e-07, "log_odds_chosen": 2.229901075363159, "log_odds_ratio": -0.21399782598018646, "logits/chosen": 0.6679060459136963, "logits/rejected": 0.6485560536384583, "logps/chosen": -1.8265341520309448, "logps/rejected": -3.9287562370300293, "loss": 0.6166, "nll_loss": 0.5951866507530212, "rewards/accuracies": 1.0, "rewards/chosen": -0.18265341222286224, "rewards/margins": 0.2102222442626953, "rewards/rejected": -0.39287564158439636, "step": 3267 }, { "epoch": 8.947296372347708, "grad_norm": 5.575995445251465, "learning_rate": 5.523287671232877e-07, "log_odds_chosen": 1.8680737018585205, "log_odds_ratio": -0.3853139877319336, "logits/chosen": 0.8969045877456665, "logits/rejected": 0.9195798635482788, "logps/chosen": -2.639599323272705, "logps/rejected": -4.336444854736328, "loss": 0.7205, "nll_loss": 0.6820075511932373, "rewards/accuracies": 0.875, "rewards/chosen": -0.26395997405052185, "rewards/margins": 0.1696845442056656, "rewards/rejected": -0.43364450335502625, "step": 3268 }, { "epoch": 8.950034223134839, "grad_norm": 7.2404985427856445, "learning_rate": 5.521917808219177e-07, "log_odds_chosen": 2.0695273876190186, "log_odds_ratio": -0.2874738872051239, "logits/chosen": 0.9418182373046875, "logits/rejected": 0.9082785844802856, "logps/chosen": -2.0473649501800537, "logps/rejected": -4.013734340667725, "loss": 0.597, "nll_loss": 0.5682111382484436, "rewards/accuracies": 0.875, "rewards/chosen": -0.20473648607730865, "rewards/margins": 0.19663694500923157, "rewards/rejected": -0.4013734459877014, "step": 3269 }, { "epoch": 8.952772073921972, "grad_norm": 3.2467737197875977, "learning_rate": 5.520547945205479e-07, "log_odds_chosen": 2.81272029876709, "log_odds_ratio": -0.1789313554763794, "logits/chosen": 0.8992166519165039, "logits/rejected": 0.8572995066642761, "logps/chosen": -1.8886370658874512, "logps/rejected": -4.563541412353516, "loss": 0.5991, "nll_loss": 0.5811610817909241, "rewards/accuracies": 0.875, "rewards/chosen": -0.18886370956897736, "rewards/margins": 0.2674903869628906, "rewards/rejected": -0.4563541114330292, "step": 3270 }, { "epoch": 8.955509924709103, "grad_norm": 4.010963439941406, "learning_rate": 5.519178082191781e-07, "log_odds_chosen": 0.7207579612731934, "log_odds_ratio": -0.7546179294586182, "logits/chosen": 0.9336405396461487, "logits/rejected": 0.9246275424957275, "logps/chosen": -2.4397411346435547, "logps/rejected": -3.094172954559326, "loss": 0.7049, "nll_loss": 0.6294453740119934, "rewards/accuracies": 0.75, "rewards/chosen": -0.24397411942481995, "rewards/margins": 0.06544318795204163, "rewards/rejected": -0.3094173073768616, "step": 3271 }, { "epoch": 8.958247775496236, "grad_norm": 3.8007123470306396, "learning_rate": 5.517808219178082e-07, "log_odds_chosen": 2.4187564849853516, "log_odds_ratio": -0.26685068011283875, "logits/chosen": 0.9050971865653992, "logits/rejected": 0.8100302815437317, "logps/chosen": -2.0153262615203857, "logps/rejected": -4.354597568511963, "loss": 0.6712, "nll_loss": 0.6445366740226746, "rewards/accuracies": 0.75, "rewards/chosen": -0.20153263211250305, "rewards/margins": 0.2339271456003189, "rewards/rejected": -0.43545976281166077, "step": 3272 }, { "epoch": 8.960985626283367, "grad_norm": 2.9468815326690674, "learning_rate": 5.516438356164383e-07, "log_odds_chosen": 2.3417742252349854, "log_odds_ratio": -0.1620556116104126, "logits/chosen": 0.944057047367096, "logits/rejected": 0.8690147995948792, "logps/chosen": -1.4538280963897705, "logps/rejected": -3.5287270545959473, "loss": 0.5952, "nll_loss": 0.5789912939071655, "rewards/accuracies": 1.0, "rewards/chosen": -0.145382821559906, "rewards/margins": 0.20748987793922424, "rewards/rejected": -0.35287269949913025, "step": 3273 }, { "epoch": 8.9637234770705, "grad_norm": 3.373135566711426, "learning_rate": 5.515068493150685e-07, "log_odds_chosen": 3.746955156326294, "log_odds_ratio": -0.09144672751426697, "logits/chosen": 0.7838786244392395, "logits/rejected": 0.786190390586853, "logps/chosen": -1.657423973083496, "logps/rejected": -5.058199882507324, "loss": 0.6011, "nll_loss": 0.5919978022575378, "rewards/accuracies": 1.0, "rewards/chosen": -0.16574238240718842, "rewards/margins": 0.34007757902145386, "rewards/rejected": -0.5058200359344482, "step": 3274 }, { "epoch": 8.966461327857632, "grad_norm": 5.490053176879883, "learning_rate": 5.513698630136986e-07, "log_odds_chosen": 2.535991907119751, "log_odds_ratio": -0.4539839029312134, "logits/chosen": 0.9842501282691956, "logits/rejected": 1.0151993036270142, "logps/chosen": -1.9042625427246094, "logps/rejected": -4.363916397094727, "loss": 0.7103, "nll_loss": 0.6648753881454468, "rewards/accuracies": 0.875, "rewards/chosen": -0.19042626023292542, "rewards/margins": 0.245965376496315, "rewards/rejected": -0.4363916516304016, "step": 3275 }, { "epoch": 8.969199178644764, "grad_norm": 3.2918756008148193, "learning_rate": 5.512328767123287e-07, "log_odds_chosen": 2.908026695251465, "log_odds_ratio": -0.1834104210138321, "logits/chosen": 0.9274964332580566, "logits/rejected": 0.8891459703445435, "logps/chosen": -1.730055809020996, "logps/rejected": -4.450107097625732, "loss": 0.676, "nll_loss": 0.6576690673828125, "rewards/accuracies": 1.0, "rewards/chosen": -0.1730055809020996, "rewards/margins": 0.2720051407814026, "rewards/rejected": -0.4450107514858246, "step": 3276 }, { "epoch": 8.971937029431896, "grad_norm": 3.7966763973236084, "learning_rate": 5.510958904109588e-07, "log_odds_chosen": 1.122344732284546, "log_odds_ratio": -0.4177401661872864, "logits/chosen": 0.7492758631706238, "logits/rejected": 0.6731529235839844, "logps/chosen": -2.4497642517089844, "logps/rejected": -3.459928035736084, "loss": 0.6694, "nll_loss": 0.6276493072509766, "rewards/accuracies": 0.875, "rewards/chosen": -0.24497641623020172, "rewards/margins": 0.10101638734340668, "rewards/rejected": -0.3459928035736084, "step": 3277 }, { "epoch": 8.974674880219029, "grad_norm": 3.9464147090911865, "learning_rate": 5.50958904109589e-07, "log_odds_chosen": 3.963732957839966, "log_odds_ratio": -0.11947152763605118, "logits/chosen": 1.109029769897461, "logits/rejected": 1.128791332244873, "logps/chosen": -2.4665989875793457, "logps/rejected": -6.195593357086182, "loss": 0.6763, "nll_loss": 0.6643283367156982, "rewards/accuracies": 1.0, "rewards/chosen": -0.24665991961956024, "rewards/margins": 0.3728994131088257, "rewards/rejected": -0.6195592880249023, "step": 3278 }, { "epoch": 8.97741273100616, "grad_norm": 3.5905637741088867, "learning_rate": 5.508219178082192e-07, "log_odds_chosen": 2.7932090759277344, "log_odds_ratio": -0.13880428671836853, "logits/chosen": 0.8724574446678162, "logits/rejected": 0.8522638082504272, "logps/chosen": -1.6360206604003906, "logps/rejected": -4.184149742126465, "loss": 0.6022, "nll_loss": 0.5882753133773804, "rewards/accuracies": 1.0, "rewards/chosen": -0.1636020690202713, "rewards/margins": 0.25481292605400085, "rewards/rejected": -0.4184149503707886, "step": 3279 }, { "epoch": 8.980150581793293, "grad_norm": 3.1968271732330322, "learning_rate": 5.506849315068492e-07, "log_odds_chosen": 1.730982780456543, "log_odds_ratio": -0.25953346490859985, "logits/chosen": 0.734169602394104, "logits/rejected": 0.7662707567214966, "logps/chosen": -1.6064670085906982, "logps/rejected": -3.181811809539795, "loss": 0.5683, "nll_loss": 0.5423235893249512, "rewards/accuracies": 1.0, "rewards/chosen": -0.1606467068195343, "rewards/margins": 0.15753448009490967, "rewards/rejected": -0.3181811571121216, "step": 3280 }, { "epoch": 8.982888432580424, "grad_norm": 4.221642017364502, "learning_rate": 5.505479452054794e-07, "log_odds_chosen": 2.400745391845703, "log_odds_ratio": -0.3152925968170166, "logits/chosen": 1.0983527898788452, "logits/rejected": 1.139600157737732, "logps/chosen": -3.045055866241455, "logps/rejected": -5.339382171630859, "loss": 0.6352, "nll_loss": 0.603657603263855, "rewards/accuracies": 0.875, "rewards/chosen": -0.3045055866241455, "rewards/margins": 0.229432612657547, "rewards/rejected": -0.5339382290840149, "step": 3281 }, { "epoch": 8.985626283367557, "grad_norm": 4.040502071380615, "learning_rate": 5.504109589041096e-07, "log_odds_chosen": 1.8169173002243042, "log_odds_ratio": -0.26345157623291016, "logits/chosen": 1.192427396774292, "logits/rejected": 1.1868069171905518, "logps/chosen": -2.1402676105499268, "logps/rejected": -3.7911925315856934, "loss": 0.623, "nll_loss": 0.5966259241104126, "rewards/accuracies": 0.875, "rewards/chosen": -0.2140267789363861, "rewards/margins": 0.16509246826171875, "rewards/rejected": -0.37911927700042725, "step": 3282 }, { "epoch": 8.988364134154688, "grad_norm": 3.0346505641937256, "learning_rate": 5.502739726027396e-07, "log_odds_chosen": 2.8166329860687256, "log_odds_ratio": -0.14476671814918518, "logits/chosen": 0.9235746264457703, "logits/rejected": 0.9353259205818176, "logps/chosen": -2.467883586883545, "logps/rejected": -5.184935569763184, "loss": 0.6096, "nll_loss": 0.5951623916625977, "rewards/accuracies": 1.0, "rewards/chosen": -0.24678833782672882, "rewards/margins": 0.2717052400112152, "rewards/rejected": -0.5184935927391052, "step": 3283 }, { "epoch": 8.991101984941821, "grad_norm": 4.042234897613525, "learning_rate": 5.501369863013698e-07, "log_odds_chosen": 4.190007209777832, "log_odds_ratio": -0.17354173958301544, "logits/chosen": 0.9615992307662964, "logits/rejected": 1.0041453838348389, "logps/chosen": -2.4492568969726562, "logps/rejected": -6.5642828941345215, "loss": 0.7345, "nll_loss": 0.7171364426612854, "rewards/accuracies": 1.0, "rewards/chosen": -0.24492570757865906, "rewards/margins": 0.41150254011154175, "rewards/rejected": -0.6564282774925232, "step": 3284 }, { "epoch": 8.993839835728952, "grad_norm": 4.657193660736084, "learning_rate": 5.5e-07, "log_odds_chosen": 2.2952880859375, "log_odds_ratio": -0.4843292236328125, "logits/chosen": 1.037212610244751, "logits/rejected": 0.9792162775993347, "logps/chosen": -2.068295955657959, "logps/rejected": -4.20566987991333, "loss": 0.6401, "nll_loss": 0.5916192531585693, "rewards/accuracies": 0.875, "rewards/chosen": -0.20682960748672485, "rewards/margins": 0.2137373834848404, "rewards/rejected": -0.42056700587272644, "step": 3285 }, { "epoch": 8.996577686516085, "grad_norm": 3.4562129974365234, "learning_rate": 5.498630136986301e-07, "log_odds_chosen": 1.7776978015899658, "log_odds_ratio": -0.2171611189842224, "logits/chosen": 0.9012591242790222, "logits/rejected": 0.9392482042312622, "logps/chosen": -1.6803319454193115, "logps/rejected": -3.307629346847534, "loss": 0.5951, "nll_loss": 0.57342129945755, "rewards/accuracies": 1.0, "rewards/chosen": -0.1680331826210022, "rewards/margins": 0.16272974014282227, "rewards/rejected": -0.33076292276382446, "step": 3286 }, { "epoch": 8.999315537303216, "grad_norm": 3.379223346710205, "learning_rate": 5.497260273972602e-07, "log_odds_chosen": 2.977860450744629, "log_odds_ratio": -0.1598949432373047, "logits/chosen": 0.8783883452415466, "logits/rejected": 0.9178945422172546, "logps/chosen": -2.1433260440826416, "logps/rejected": -4.971848011016846, "loss": 0.7708, "nll_loss": 0.7548536062240601, "rewards/accuracies": 1.0, "rewards/chosen": -0.21433261036872864, "rewards/margins": 0.2828522026538849, "rewards/rejected": -0.4971848130226135, "step": 3287 }, { "epoch": 9.00205338809035, "grad_norm": 3.4619569778442383, "learning_rate": 5.495890410958904e-07, "log_odds_chosen": 2.271772861480713, "log_odds_ratio": -0.27939891815185547, "logits/chosen": 1.057350993156433, "logits/rejected": 1.0173311233520508, "logps/chosen": -2.287397861480713, "logps/rejected": -4.507411003112793, "loss": 0.649, "nll_loss": 0.6210434436798096, "rewards/accuracies": 0.875, "rewards/chosen": -0.22873982787132263, "rewards/margins": 0.22200125455856323, "rewards/rejected": -0.45074108242988586, "step": 3288 }, { "epoch": 9.00479123887748, "grad_norm": 3.322700262069702, "learning_rate": 5.494520547945205e-07, "log_odds_chosen": 2.2947998046875, "log_odds_ratio": -0.21234218776226044, "logits/chosen": 1.2059030532836914, "logits/rejected": 1.1952502727508545, "logps/chosen": -2.207373857498169, "logps/rejected": -4.375057697296143, "loss": 0.5501, "nll_loss": 0.5288269519805908, "rewards/accuracies": 1.0, "rewards/chosen": -0.22073739767074585, "rewards/margins": 0.21676838397979736, "rewards/rejected": -0.4375057816505432, "step": 3289 }, { "epoch": 9.007529089664613, "grad_norm": 5.3799004554748535, "learning_rate": 5.493150684931506e-07, "log_odds_chosen": 3.7126059532165527, "log_odds_ratio": -0.31010162830352783, "logits/chosen": 0.9582251310348511, "logits/rejected": 0.9663557410240173, "logps/chosen": -2.404226064682007, "logps/rejected": -5.999144554138184, "loss": 0.6915, "nll_loss": 0.6604454517364502, "rewards/accuracies": 0.875, "rewards/chosen": -0.24042260646820068, "rewards/margins": 0.35949182510375977, "rewards/rejected": -0.5999144315719604, "step": 3290 }, { "epoch": 9.010266940451745, "grad_norm": 3.7626705169677734, "learning_rate": 5.491780821917807e-07, "log_odds_chosen": 1.2539234161376953, "log_odds_ratio": -0.3481355905532837, "logits/chosen": 0.9829789400100708, "logits/rejected": 0.9334467053413391, "logps/chosen": -1.741557002067566, "logps/rejected": -2.8254261016845703, "loss": 0.59, "nll_loss": 0.5552084445953369, "rewards/accuracies": 0.875, "rewards/chosen": -0.17415569722652435, "rewards/margins": 0.10838691145181656, "rewards/rejected": -0.2825425863265991, "step": 3291 }, { "epoch": 9.013004791238878, "grad_norm": 3.2145473957061768, "learning_rate": 5.490410958904109e-07, "log_odds_chosen": 1.185471773147583, "log_odds_ratio": -0.38255253434181213, "logits/chosen": 0.9009178280830383, "logits/rejected": 0.7969266176223755, "logps/chosen": -1.2776732444763184, "logps/rejected": -2.27243971824646, "loss": 0.6007, "nll_loss": 0.5624925494194031, "rewards/accuracies": 0.875, "rewards/chosen": -0.12776732444763184, "rewards/margins": 0.0994766503572464, "rewards/rejected": -0.22724398970603943, "step": 3292 }, { "epoch": 9.015742642026009, "grad_norm": 2.989846706390381, "learning_rate": 5.489041095890411e-07, "log_odds_chosen": 3.531254291534424, "log_odds_ratio": -0.24305056035518646, "logits/chosen": 0.6919940710067749, "logits/rejected": 0.6607356071472168, "logps/chosen": -1.8186166286468506, "logps/rejected": -5.197864532470703, "loss": 0.6147, "nll_loss": 0.5904355049133301, "rewards/accuracies": 1.0, "rewards/chosen": -0.18186166882514954, "rewards/margins": 0.3379247784614563, "rewards/rejected": -0.5197864770889282, "step": 3293 }, { "epoch": 9.018480492813142, "grad_norm": 3.110940933227539, "learning_rate": 5.487671232876711e-07, "log_odds_chosen": 3.273615598678589, "log_odds_ratio": -0.23413404822349548, "logits/chosen": 0.9830706119537354, "logits/rejected": 1.0101054906845093, "logps/chosen": -2.1781322956085205, "logps/rejected": -5.26724100112915, "loss": 0.6676, "nll_loss": 0.6441444754600525, "rewards/accuracies": 0.75, "rewards/chosen": -0.21781325340270996, "rewards/margins": 0.30891087651252747, "rewards/rejected": -0.526724100112915, "step": 3294 }, { "epoch": 9.021218343600275, "grad_norm": 5.312326431274414, "learning_rate": 5.486301369863013e-07, "log_odds_chosen": 1.4644709825515747, "log_odds_ratio": -0.5595138669013977, "logits/chosen": 0.8018524646759033, "logits/rejected": 0.8957030773162842, "logps/chosen": -3.348848342895508, "logps/rejected": -4.733356475830078, "loss": 0.8496, "nll_loss": 0.7936655879020691, "rewards/accuracies": 0.875, "rewards/chosen": -0.3348848521709442, "rewards/margins": 0.13845083117485046, "rewards/rejected": -0.4733356833457947, "step": 3295 }, { "epoch": 9.023956194387406, "grad_norm": 3.12833309173584, "learning_rate": 5.484931506849315e-07, "log_odds_chosen": 1.2278263568878174, "log_odds_ratio": -0.3626345098018646, "logits/chosen": 0.8988773822784424, "logits/rejected": 0.855292558670044, "logps/chosen": -1.5732117891311646, "logps/rejected": -2.663076877593994, "loss": 0.6144, "nll_loss": 0.5781855583190918, "rewards/accuracies": 0.875, "rewards/chosen": -0.15732118487358093, "rewards/margins": 0.108986496925354, "rewards/rejected": -0.26630768179893494, "step": 3296 }, { "epoch": 9.026694045174539, "grad_norm": 3.808530569076538, "learning_rate": 5.483561643835615e-07, "log_odds_chosen": 2.5151522159576416, "log_odds_ratio": -0.21923401951789856, "logits/chosen": 0.9562757015228271, "logits/rejected": 1.0317562818527222, "logps/chosen": -2.640591859817505, "logps/rejected": -4.993772506713867, "loss": 0.6677, "nll_loss": 0.6457830667495728, "rewards/accuracies": 1.0, "rewards/chosen": -0.2640591859817505, "rewards/margins": 0.23531809449195862, "rewards/rejected": -0.4993772804737091, "step": 3297 }, { "epoch": 9.02943189596167, "grad_norm": 3.692518472671509, "learning_rate": 5.482191780821917e-07, "log_odds_chosen": 0.8776509165763855, "log_odds_ratio": -0.41599956154823303, "logits/chosen": 1.0408507585525513, "logits/rejected": 1.0142005681991577, "logps/chosen": -1.9329168796539307, "logps/rejected": -2.709477424621582, "loss": 0.6277, "nll_loss": 0.5861337780952454, "rewards/accuracies": 1.0, "rewards/chosen": -0.19329170882701874, "rewards/margins": 0.07765603065490723, "rewards/rejected": -0.27094772458076477, "step": 3298 }, { "epoch": 9.032169746748803, "grad_norm": 3.6831743717193604, "learning_rate": 5.480821917808219e-07, "log_odds_chosen": 3.6124424934387207, "log_odds_ratio": -0.10372400283813477, "logits/chosen": 0.9657317996025085, "logits/rejected": 1.0074483156204224, "logps/chosen": -2.570672035217285, "logps/rejected": -6.022815704345703, "loss": 0.6927, "nll_loss": 0.6822913885116577, "rewards/accuracies": 1.0, "rewards/chosen": -0.2570672035217285, "rewards/margins": 0.3452144265174866, "rewards/rejected": -0.6022816300392151, "step": 3299 }, { "epoch": 9.034907597535934, "grad_norm": 4.943498134613037, "learning_rate": 5.47945205479452e-07, "log_odds_chosen": 0.702384889125824, "log_odds_ratio": -0.45553869009017944, "logits/chosen": 0.7264975309371948, "logits/rejected": 0.6346499919891357, "logps/chosen": -2.0495991706848145, "logps/rejected": -2.6310887336730957, "loss": 0.689, "nll_loss": 0.6434161067008972, "rewards/accuracies": 0.875, "rewards/chosen": -0.2049599289894104, "rewards/margins": 0.05814895033836365, "rewards/rejected": -0.26310890913009644, "step": 3300 }, { "epoch": 9.037645448323067, "grad_norm": 3.0952558517456055, "learning_rate": 5.478082191780821e-07, "log_odds_chosen": 1.621904730796814, "log_odds_ratio": -0.2460833042860031, "logits/chosen": 0.5194905400276184, "logits/rejected": 0.47757017612457275, "logps/chosen": -1.6370935440063477, "logps/rejected": -3.091637134552002, "loss": 0.5613, "nll_loss": 0.5367077589035034, "rewards/accuracies": 1.0, "rewards/chosen": -0.1637093424797058, "rewards/margins": 0.14545437693595886, "rewards/rejected": -0.3091637194156647, "step": 3301 }, { "epoch": 9.040383299110198, "grad_norm": 3.738832712173462, "learning_rate": 5.476712328767123e-07, "log_odds_chosen": 1.0506279468536377, "log_odds_ratio": -0.4231279492378235, "logits/chosen": 0.8466811776161194, "logits/rejected": 0.8175270557403564, "logps/chosen": -1.7927557229995728, "logps/rejected": -2.7423226833343506, "loss": 0.6135, "nll_loss": 0.5712120532989502, "rewards/accuracies": 0.75, "rewards/chosen": -0.17927557229995728, "rewards/margins": 0.09495669603347778, "rewards/rejected": -0.27423226833343506, "step": 3302 }, { "epoch": 9.043121149897331, "grad_norm": 8.440866470336914, "learning_rate": 5.475342465753424e-07, "log_odds_chosen": 1.618691325187683, "log_odds_ratio": -0.26391905546188354, "logits/chosen": 1.0134509801864624, "logits/rejected": 0.928867757320404, "logps/chosen": -1.8889516592025757, "logps/rejected": -3.285052537918091, "loss": 0.6103, "nll_loss": 0.5839471817016602, "rewards/accuracies": 1.0, "rewards/chosen": -0.18889516592025757, "rewards/margins": 0.13961009681224823, "rewards/rejected": -0.3285052478313446, "step": 3303 }, { "epoch": 9.045859000684462, "grad_norm": 3.4886879920959473, "learning_rate": 5.473972602739725e-07, "log_odds_chosen": 2.988795518875122, "log_odds_ratio": -0.21549341082572937, "logits/chosen": 1.0347952842712402, "logits/rejected": 1.0546678304672241, "logps/chosen": -2.066683769226074, "logps/rejected": -4.921846866607666, "loss": 0.7526, "nll_loss": 0.7310692071914673, "rewards/accuracies": 0.875, "rewards/chosen": -0.20666839182376862, "rewards/margins": 0.28551626205444336, "rewards/rejected": -0.49218466877937317, "step": 3304 }, { "epoch": 9.048596851471595, "grad_norm": 5.377261638641357, "learning_rate": 5.472602739726026e-07, "log_odds_chosen": 1.352445125579834, "log_odds_ratio": -0.3369162678718567, "logits/chosen": 0.8182264566421509, "logits/rejected": 0.8035029768943787, "logps/chosen": -2.635857343673706, "logps/rejected": -3.859314203262329, "loss": 0.6404, "nll_loss": 0.6067144870758057, "rewards/accuracies": 0.875, "rewards/chosen": -0.26358574628829956, "rewards/margins": 0.1223456934094429, "rewards/rejected": -0.38593143224716187, "step": 3305 }, { "epoch": 9.051334702258726, "grad_norm": 3.8848769664764404, "learning_rate": 5.471232876712329e-07, "log_odds_chosen": 1.419472098350525, "log_odds_ratio": -0.3334098756313324, "logits/chosen": 0.5415597558021545, "logits/rejected": 0.47444695234298706, "logps/chosen": -2.129391670227051, "logps/rejected": -3.4404406547546387, "loss": 0.6703, "nll_loss": 0.63694828748703, "rewards/accuracies": 0.75, "rewards/chosen": -0.21293917298316956, "rewards/margins": 0.13110490143299103, "rewards/rejected": -0.3440440893173218, "step": 3306 }, { "epoch": 9.05407255304586, "grad_norm": 3.4283127784729004, "learning_rate": 5.469863013698631e-07, "log_odds_chosen": 2.1174240112304688, "log_odds_ratio": -0.29715868830680847, "logits/chosen": 0.901771068572998, "logits/rejected": 0.9136998653411865, "logps/chosen": -2.060366153717041, "logps/rejected": -4.0593037605285645, "loss": 0.7201, "nll_loss": 0.6903847455978394, "rewards/accuracies": 0.875, "rewards/chosen": -0.20603662729263306, "rewards/margins": 0.1998937726020813, "rewards/rejected": -0.40593039989471436, "step": 3307 }, { "epoch": 9.05681040383299, "grad_norm": 3.165708303451538, "learning_rate": 5.46849315068493e-07, "log_odds_chosen": 1.9204046726226807, "log_odds_ratio": -0.22698074579238892, "logits/chosen": 0.8547518849372864, "logits/rejected": 0.768807053565979, "logps/chosen": -1.51983642578125, "logps/rejected": -3.235990047454834, "loss": 0.66, "nll_loss": 0.6372546553611755, "rewards/accuracies": 1.0, "rewards/chosen": -0.15198364853858948, "rewards/margins": 0.1716153621673584, "rewards/rejected": -0.3235990107059479, "step": 3308 }, { "epoch": 9.059548254620124, "grad_norm": 4.450433731079102, "learning_rate": 5.467123287671233e-07, "log_odds_chosen": 2.1392602920532227, "log_odds_ratio": -0.3416941165924072, "logits/chosen": 0.9832192063331604, "logits/rejected": 1.041759729385376, "logps/chosen": -2.5855119228363037, "logps/rejected": -4.496830463409424, "loss": 0.6491, "nll_loss": 0.6149605512619019, "rewards/accuracies": 0.875, "rewards/chosen": -0.2585511803627014, "rewards/margins": 0.1911318600177765, "rewards/rejected": -0.4496830403804779, "step": 3309 }, { "epoch": 9.062286105407255, "grad_norm": 3.052988052368164, "learning_rate": 5.465753424657535e-07, "log_odds_chosen": 3.9508752822875977, "log_odds_ratio": -0.11196201294660568, "logits/chosen": 0.6768978834152222, "logits/rejected": 0.6688281297683716, "logps/chosen": -1.8348506689071655, "logps/rejected": -5.5634965896606445, "loss": 0.5632, "nll_loss": 0.5519681572914124, "rewards/accuracies": 1.0, "rewards/chosen": -0.18348506093025208, "rewards/margins": 0.3728645443916321, "rewards/rejected": -0.5563496351242065, "step": 3310 }, { "epoch": 9.065023956194388, "grad_norm": 3.1820645332336426, "learning_rate": 5.464383561643835e-07, "log_odds_chosen": 3.664027214050293, "log_odds_ratio": -0.12115712463855743, "logits/chosen": 0.8853623270988464, "logits/rejected": 0.77829909324646, "logps/chosen": -1.7545740604400635, "logps/rejected": -5.150690078735352, "loss": 0.6601, "nll_loss": 0.6479373574256897, "rewards/accuracies": 1.0, "rewards/chosen": -0.1754574179649353, "rewards/margins": 0.33961161971092224, "rewards/rejected": -0.5150690078735352, "step": 3311 }, { "epoch": 9.067761806981519, "grad_norm": 3.660822868347168, "learning_rate": 5.463013698630137e-07, "log_odds_chosen": 3.0386438369750977, "log_odds_ratio": -0.2547241151332855, "logits/chosen": 0.8801804184913635, "logits/rejected": 0.8569455146789551, "logps/chosen": -2.577439069747925, "logps/rejected": -5.481346130371094, "loss": 0.6905, "nll_loss": 0.6649925112724304, "rewards/accuracies": 0.875, "rewards/chosen": -0.2577439248561859, "rewards/margins": 0.2903907001018524, "rewards/rejected": -0.5481346845626831, "step": 3312 }, { "epoch": 9.070499657768652, "grad_norm": 3.1232569217681885, "learning_rate": 5.461643835616439e-07, "log_odds_chosen": 4.703372955322266, "log_odds_ratio": -0.06765579432249069, "logits/chosen": 1.044390320777893, "logits/rejected": 1.0559558868408203, "logps/chosen": -1.9513168334960938, "logps/rejected": -6.343976020812988, "loss": 0.6689, "nll_loss": 0.6621760725975037, "rewards/accuracies": 1.0, "rewards/chosen": -0.19513167440891266, "rewards/margins": 0.4392659068107605, "rewards/rejected": -0.6343976259231567, "step": 3313 }, { "epoch": 9.073237508555783, "grad_norm": 3.6910576820373535, "learning_rate": 5.46027397260274e-07, "log_odds_chosen": 1.913638710975647, "log_odds_ratio": -0.32600340247154236, "logits/chosen": 0.8903759717941284, "logits/rejected": 0.9522223472595215, "logps/chosen": -1.7524011135101318, "logps/rejected": -3.513929843902588, "loss": 0.624, "nll_loss": 0.5913960337638855, "rewards/accuracies": 0.875, "rewards/chosen": -0.17524012923240662, "rewards/margins": 0.17615285515785217, "rewards/rejected": -0.3513929843902588, "step": 3314 }, { "epoch": 9.075975359342916, "grad_norm": 3.3622701168060303, "learning_rate": 5.458904109589041e-07, "log_odds_chosen": 1.9636602401733398, "log_odds_ratio": -0.25656554102897644, "logits/chosen": 1.1883213520050049, "logits/rejected": 1.1957110166549683, "logps/chosen": -1.9645847082138062, "logps/rejected": -3.8187475204467773, "loss": 0.5944, "nll_loss": 0.5687693357467651, "rewards/accuracies": 1.0, "rewards/chosen": -0.19645845890045166, "rewards/margins": 0.1854162961244583, "rewards/rejected": -0.3818747401237488, "step": 3315 }, { "epoch": 9.078713210130047, "grad_norm": 3.7839667797088623, "learning_rate": 5.457534246575343e-07, "log_odds_chosen": 1.7700958251953125, "log_odds_ratio": -0.3863682448863983, "logits/chosen": 0.8896175026893616, "logits/rejected": 0.8734090328216553, "logps/chosen": -1.7740858793258667, "logps/rejected": -3.411308526992798, "loss": 0.5612, "nll_loss": 0.5226113200187683, "rewards/accuracies": 0.875, "rewards/chosen": -0.1774086058139801, "rewards/margins": 0.16372227668762207, "rewards/rejected": -0.3411308526992798, "step": 3316 }, { "epoch": 9.08145106091718, "grad_norm": 7.096570014953613, "learning_rate": 5.456164383561644e-07, "log_odds_chosen": 2.543978452682495, "log_odds_ratio": -0.5508525371551514, "logits/chosen": 0.9176338911056519, "logits/rejected": 0.8861865997314453, "logps/chosen": -2.308500051498413, "logps/rejected": -4.604330062866211, "loss": 0.6193, "nll_loss": 0.5641764998435974, "rewards/accuracies": 0.75, "rewards/chosen": -0.2308499962091446, "rewards/margins": 0.2295830100774765, "rewards/rejected": -0.4604330062866211, "step": 3317 }, { "epoch": 9.084188911704311, "grad_norm": 5.3742241859436035, "learning_rate": 5.454794520547945e-07, "log_odds_chosen": 3.0382509231567383, "log_odds_ratio": -0.1880139261484146, "logits/chosen": 1.0889160633087158, "logits/rejected": 1.100069284439087, "logps/chosen": -2.132941961288452, "logps/rejected": -5.030997276306152, "loss": 0.6675, "nll_loss": 0.6486749649047852, "rewards/accuracies": 0.875, "rewards/chosen": -0.21329420804977417, "rewards/margins": 0.28980550169944763, "rewards/rejected": -0.5030996799468994, "step": 3318 }, { "epoch": 9.086926762491444, "grad_norm": 3.9375245571136475, "learning_rate": 5.453424657534247e-07, "log_odds_chosen": 1.815120816230774, "log_odds_ratio": -0.232933908700943, "logits/chosen": 0.9624731540679932, "logits/rejected": 0.9743567109107971, "logps/chosen": -2.233851909637451, "logps/rejected": -3.878023862838745, "loss": 0.5783, "nll_loss": 0.5549867749214172, "rewards/accuracies": 1.0, "rewards/chosen": -0.22338517010211945, "rewards/margins": 0.16441720724105835, "rewards/rejected": -0.3878023624420166, "step": 3319 }, { "epoch": 9.089664613278575, "grad_norm": 3.258901357650757, "learning_rate": 5.452054794520548e-07, "log_odds_chosen": 2.9307661056518555, "log_odds_ratio": -0.1925438940525055, "logits/chosen": 1.285454273223877, "logits/rejected": 1.286238193511963, "logps/chosen": -1.5755654573440552, "logps/rejected": -4.285013198852539, "loss": 0.5122, "nll_loss": 0.49298951029777527, "rewards/accuracies": 1.0, "rewards/chosen": -0.15755654871463776, "rewards/margins": 0.2709447741508484, "rewards/rejected": -0.42850130796432495, "step": 3320 }, { "epoch": 9.092402464065708, "grad_norm": 3.5484304428100586, "learning_rate": 5.45068493150685e-07, "log_odds_chosen": 1.607657551765442, "log_odds_ratio": -0.3799285292625427, "logits/chosen": 0.7394143342971802, "logits/rejected": 0.7321004271507263, "logps/chosen": -1.9755847454071045, "logps/rejected": -3.506117343902588, "loss": 0.6495, "nll_loss": 0.6114811301231384, "rewards/accuracies": 0.875, "rewards/chosen": -0.19755849242210388, "rewards/margins": 0.15305326879024506, "rewards/rejected": -0.35061174631118774, "step": 3321 }, { "epoch": 9.095140314852841, "grad_norm": 3.423670530319214, "learning_rate": 5.44931506849315e-07, "log_odds_chosen": 2.08919358253479, "log_odds_ratio": -0.4786437153816223, "logits/chosen": 0.8830126523971558, "logits/rejected": 0.8546374440193176, "logps/chosen": -2.1788132190704346, "logps/rejected": -4.147693634033203, "loss": 0.662, "nll_loss": 0.6141500473022461, "rewards/accuracies": 0.875, "rewards/chosen": -0.21788130700588226, "rewards/margins": 0.19688807427883148, "rewards/rejected": -0.41476935148239136, "step": 3322 }, { "epoch": 9.097878165639973, "grad_norm": 4.659367084503174, "learning_rate": 5.447945205479452e-07, "log_odds_chosen": 1.7030428647994995, "log_odds_ratio": -0.38159745931625366, "logits/chosen": 0.8735060691833496, "logits/rejected": 0.8215179443359375, "logps/chosen": -2.26320481300354, "logps/rejected": -3.8976309299468994, "loss": 0.7184, "nll_loss": 0.6802424192428589, "rewards/accuracies": 0.875, "rewards/chosen": -0.22632049024105072, "rewards/margins": 0.16344258189201355, "rewards/rejected": -0.38976308703422546, "step": 3323 }, { "epoch": 9.100616016427105, "grad_norm": 5.8993000984191895, "learning_rate": 5.446575342465754e-07, "log_odds_chosen": 1.8512730598449707, "log_odds_ratio": -0.2038261741399765, "logits/chosen": 1.0031101703643799, "logits/rejected": 0.9116536378860474, "logps/chosen": -1.8969964981079102, "logps/rejected": -3.559450149536133, "loss": 0.688, "nll_loss": 0.667618453502655, "rewards/accuracies": 1.0, "rewards/chosen": -0.18969964981079102, "rewards/margins": 0.16624537110328674, "rewards/rejected": -0.35594505071640015, "step": 3324 }, { "epoch": 9.103353867214237, "grad_norm": 3.0128862857818604, "learning_rate": 5.445205479452054e-07, "log_odds_chosen": 2.9228551387786865, "log_odds_ratio": -0.14451190829277039, "logits/chosen": 0.9855471849441528, "logits/rejected": 0.9959975481033325, "logps/chosen": -2.193695068359375, "logps/rejected": -4.957573890686035, "loss": 0.6135, "nll_loss": 0.5990180373191833, "rewards/accuracies": 0.875, "rewards/chosen": -0.21936950087547302, "rewards/margins": 0.27638790011405945, "rewards/rejected": -0.49575740098953247, "step": 3325 }, { "epoch": 9.10609171800137, "grad_norm": 3.257143020629883, "learning_rate": 5.443835616438356e-07, "log_odds_chosen": 1.8812291622161865, "log_odds_ratio": -0.3065911531448364, "logits/chosen": 0.9605851769447327, "logits/rejected": 0.8881454467773438, "logps/chosen": -2.0062942504882812, "logps/rejected": -3.794142007827759, "loss": 0.6562, "nll_loss": 0.625520646572113, "rewards/accuracies": 0.75, "rewards/chosen": -0.20062944293022156, "rewards/margins": 0.1787847876548767, "rewards/rejected": -0.3794142007827759, "step": 3326 }, { "epoch": 9.1088295687885, "grad_norm": 3.670395612716675, "learning_rate": 5.442465753424658e-07, "log_odds_chosen": 2.202190399169922, "log_odds_ratio": -0.2814074754714966, "logits/chosen": 0.8462380170822144, "logits/rejected": 0.7797849178314209, "logps/chosen": -1.7900183200836182, "logps/rejected": -3.8333091735839844, "loss": 0.5886, "nll_loss": 0.5604528188705444, "rewards/accuracies": 0.875, "rewards/chosen": -0.1790018379688263, "rewards/margins": 0.20432907342910767, "rewards/rejected": -0.38333091139793396, "step": 3327 }, { "epoch": 9.111567419575634, "grad_norm": 2.8822154998779297, "learning_rate": 5.441095890410959e-07, "log_odds_chosen": 4.045384407043457, "log_odds_ratio": -0.1862182468175888, "logits/chosen": 0.9785466194152832, "logits/rejected": 0.9894278049468994, "logps/chosen": -2.0475940704345703, "logps/rejected": -5.970844268798828, "loss": 0.6286, "nll_loss": 0.6099439263343811, "rewards/accuracies": 1.0, "rewards/chosen": -0.204759418964386, "rewards/margins": 0.3923250138759613, "rewards/rejected": -0.5970844030380249, "step": 3328 }, { "epoch": 9.114305270362765, "grad_norm": 4.150238513946533, "learning_rate": 5.43972602739726e-07, "log_odds_chosen": 4.050668716430664, "log_odds_ratio": -0.16777952015399933, "logits/chosen": 0.9682714939117432, "logits/rejected": 0.9380502700805664, "logps/chosen": -2.443634510040283, "logps/rejected": -6.40382194519043, "loss": 0.6828, "nll_loss": 0.6659818291664124, "rewards/accuracies": 1.0, "rewards/chosen": -0.2443634569644928, "rewards/margins": 0.39601877331733704, "rewards/rejected": -0.6403822302818298, "step": 3329 }, { "epoch": 9.117043121149898, "grad_norm": 4.014115810394287, "learning_rate": 5.438356164383562e-07, "log_odds_chosen": 0.8867362141609192, "log_odds_ratio": -0.4120681583881378, "logits/chosen": 0.9457980394363403, "logits/rejected": 0.8876475095748901, "logps/chosen": -1.3634811639785767, "logps/rejected": -2.114367723464966, "loss": 0.5608, "nll_loss": 0.5196099281311035, "rewards/accuracies": 0.875, "rewards/chosen": -0.13634811341762543, "rewards/margins": 0.07508864253759384, "rewards/rejected": -0.21143677830696106, "step": 3330 }, { "epoch": 9.119780971937029, "grad_norm": 3.660489082336426, "learning_rate": 5.436986301369863e-07, "log_odds_chosen": 2.8987009525299072, "log_odds_ratio": -0.24014133214950562, "logits/chosen": 0.7117516994476318, "logits/rejected": 0.71239173412323, "logps/chosen": -1.6510982513427734, "logps/rejected": -4.367453098297119, "loss": 0.5834, "nll_loss": 0.5594336986541748, "rewards/accuracies": 0.875, "rewards/chosen": -0.16510982811450958, "rewards/margins": 0.2716354727745056, "rewards/rejected": -0.436745285987854, "step": 3331 }, { "epoch": 9.122518822724162, "grad_norm": 3.330491304397583, "learning_rate": 5.435616438356164e-07, "log_odds_chosen": 2.0732476711273193, "log_odds_ratio": -0.2465668022632599, "logits/chosen": 0.9500974416732788, "logits/rejected": 0.8558879494667053, "logps/chosen": -1.5202834606170654, "logps/rejected": -3.411766529083252, "loss": 0.6104, "nll_loss": 0.585783839225769, "rewards/accuracies": 0.875, "rewards/chosen": -0.15202835202217102, "rewards/margins": 0.18914830684661865, "rewards/rejected": -0.3411766588687897, "step": 3332 }, { "epoch": 9.125256673511293, "grad_norm": 2.879282236099243, "learning_rate": 5.434246575342466e-07, "log_odds_chosen": 1.9684669971466064, "log_odds_ratio": -0.24519503116607666, "logits/chosen": 1.012176275253296, "logits/rejected": 0.9117081165313721, "logps/chosen": -1.3853166103363037, "logps/rejected": -3.15010404586792, "loss": 0.5716, "nll_loss": 0.5470818281173706, "rewards/accuracies": 1.0, "rewards/chosen": -0.1385316699743271, "rewards/margins": 0.17647874355316162, "rewards/rejected": -0.3150103986263275, "step": 3333 }, { "epoch": 9.127994524298426, "grad_norm": 3.377965211868286, "learning_rate": 5.432876712328767e-07, "log_odds_chosen": 1.9243133068084717, "log_odds_ratio": -0.2508823871612549, "logits/chosen": 0.6820151805877686, "logits/rejected": 0.6620516777038574, "logps/chosen": -1.520646095275879, "logps/rejected": -3.2772488594055176, "loss": 0.5793, "nll_loss": 0.5542305111885071, "rewards/accuracies": 1.0, "rewards/chosen": -0.15206460654735565, "rewards/margins": 0.17566028237342834, "rewards/rejected": -0.3277248740196228, "step": 3334 }, { "epoch": 9.130732375085557, "grad_norm": 4.133534908294678, "learning_rate": 5.431506849315069e-07, "log_odds_chosen": 1.5605065822601318, "log_odds_ratio": -0.30873584747314453, "logits/chosen": 0.9804109334945679, "logits/rejected": 0.9606085419654846, "logps/chosen": -1.9587873220443726, "logps/rejected": -3.4178292751312256, "loss": 0.5824, "nll_loss": 0.5515509247779846, "rewards/accuracies": 1.0, "rewards/chosen": -0.19587872922420502, "rewards/margins": 0.14590419828891754, "rewards/rejected": -0.34178292751312256, "step": 3335 }, { "epoch": 9.13347022587269, "grad_norm": 3.5763683319091797, "learning_rate": 5.430136986301369e-07, "log_odds_chosen": 1.9732162952423096, "log_odds_ratio": -0.3691418766975403, "logits/chosen": 0.8464210033416748, "logits/rejected": 0.7305150032043457, "logps/chosen": -1.6332080364227295, "logps/rejected": -3.4754385948181152, "loss": 0.651, "nll_loss": 0.6140813827514648, "rewards/accuracies": 0.875, "rewards/chosen": -0.16332079470157623, "rewards/margins": 0.18422307074069977, "rewards/rejected": -0.347543865442276, "step": 3336 }, { "epoch": 9.136208076659821, "grad_norm": 3.5581517219543457, "learning_rate": 5.428767123287671e-07, "log_odds_chosen": 2.6021499633789062, "log_odds_ratio": -0.2520137429237366, "logits/chosen": 0.9629417657852173, "logits/rejected": 0.9498500823974609, "logps/chosen": -2.0635809898376465, "logps/rejected": -4.471205711364746, "loss": 0.6252, "nll_loss": 0.5999824404716492, "rewards/accuracies": 0.875, "rewards/chosen": -0.20635809004306793, "rewards/margins": 0.24076250195503235, "rewards/rejected": -0.4471205472946167, "step": 3337 }, { "epoch": 9.138945927446954, "grad_norm": 3.571592092514038, "learning_rate": 5.427397260273973e-07, "log_odds_chosen": 2.349663257598877, "log_odds_ratio": -0.26176580786705017, "logits/chosen": 1.0841628313064575, "logits/rejected": 1.107540488243103, "logps/chosen": -1.8268139362335205, "logps/rejected": -3.9593939781188965, "loss": 0.5184, "nll_loss": 0.4922085404396057, "rewards/accuracies": 0.875, "rewards/chosen": -0.1826813966035843, "rewards/margins": 0.21325796842575073, "rewards/rejected": -0.3959394097328186, "step": 3338 }, { "epoch": 9.141683778234086, "grad_norm": 4.2630486488342285, "learning_rate": 5.426027397260273e-07, "log_odds_chosen": 3.2306947708129883, "log_odds_ratio": -0.20477384328842163, "logits/chosen": 0.8125680088996887, "logits/rejected": 0.8265301585197449, "logps/chosen": -1.770869255065918, "logps/rejected": -4.747705459594727, "loss": 0.6004, "nll_loss": 0.5798860788345337, "rewards/accuracies": 0.875, "rewards/chosen": -0.17708691954612732, "rewards/margins": 0.29768359661102295, "rewards/rejected": -0.47477054595947266, "step": 3339 }, { "epoch": 9.144421629021219, "grad_norm": 3.9743552207946777, "learning_rate": 5.424657534246575e-07, "log_odds_chosen": 4.352027893066406, "log_odds_ratio": -0.2474895715713501, "logits/chosen": 1.005617380142212, "logits/rejected": 1.0274313688278198, "logps/chosen": -2.9962708950042725, "logps/rejected": -7.223172187805176, "loss": 0.7243, "nll_loss": 0.6995807886123657, "rewards/accuracies": 0.875, "rewards/chosen": -0.2996270954608917, "rewards/margins": 0.42269012331962585, "rewards/rejected": -0.7223172187805176, "step": 3340 }, { "epoch": 9.14715947980835, "grad_norm": 3.806490659713745, "learning_rate": 5.423287671232877e-07, "log_odds_chosen": 1.7594074010849, "log_odds_ratio": -0.28110820055007935, "logits/chosen": 0.9624395966529846, "logits/rejected": 0.9931252002716064, "logps/chosen": -1.7812782526016235, "logps/rejected": -3.3685994148254395, "loss": 0.6304, "nll_loss": 0.6022850275039673, "rewards/accuracies": 1.0, "rewards/chosen": -0.17812782526016235, "rewards/margins": 0.15873213112354279, "rewards/rejected": -0.33685994148254395, "step": 3341 }, { "epoch": 9.149897330595483, "grad_norm": 4.895751953125, "learning_rate": 5.421917808219178e-07, "log_odds_chosen": 0.9415485262870789, "log_odds_ratio": -0.8051921129226685, "logits/chosen": 0.9623722434043884, "logits/rejected": 0.9468189477920532, "logps/chosen": -2.6679329872131348, "logps/rejected": -3.460857391357422, "loss": 0.6566, "nll_loss": 0.5760723948478699, "rewards/accuracies": 0.875, "rewards/chosen": -0.26679331064224243, "rewards/margins": 0.07929246872663498, "rewards/rejected": -0.3460857570171356, "step": 3342 }, { "epoch": 9.152635181382614, "grad_norm": 4.287942409515381, "learning_rate": 5.420547945205479e-07, "log_odds_chosen": 1.202815294265747, "log_odds_ratio": -0.6227878332138062, "logits/chosen": 0.8118240833282471, "logits/rejected": 0.7932347059249878, "logps/chosen": -2.0674896240234375, "logps/rejected": -3.223210334777832, "loss": 0.7265, "nll_loss": 0.6641920208930969, "rewards/accuracies": 0.75, "rewards/chosen": -0.20674897730350494, "rewards/margins": 0.11557203531265259, "rewards/rejected": -0.32232099771499634, "step": 3343 }, { "epoch": 9.155373032169747, "grad_norm": 3.2602975368499756, "learning_rate": 5.419178082191781e-07, "log_odds_chosen": 3.3959150314331055, "log_odds_ratio": -0.22979065775871277, "logits/chosen": 0.893070638179779, "logits/rejected": 0.8668671250343323, "logps/chosen": -1.5326073169708252, "logps/rejected": -4.571478843688965, "loss": 0.6471, "nll_loss": 0.6240929961204529, "rewards/accuracies": 0.75, "rewards/chosen": -0.153260737657547, "rewards/margins": 0.30388712882995605, "rewards/rejected": -0.45714783668518066, "step": 3344 }, { "epoch": 9.158110882956878, "grad_norm": 3.5617971420288086, "learning_rate": 5.417808219178082e-07, "log_odds_chosen": 1.685127854347229, "log_odds_ratio": -0.3842538595199585, "logits/chosen": 0.7451080083847046, "logits/rejected": 0.6573185324668884, "logps/chosen": -1.3694615364074707, "logps/rejected": -2.847018003463745, "loss": 0.5843, "nll_loss": 0.545839250087738, "rewards/accuracies": 0.875, "rewards/chosen": -0.1369461715221405, "rewards/margins": 0.14775562286376953, "rewards/rejected": -0.28470179438591003, "step": 3345 }, { "epoch": 9.160848733744011, "grad_norm": 3.3530890941619873, "learning_rate": 5.416438356164383e-07, "log_odds_chosen": 3.7873082160949707, "log_odds_ratio": -0.19078634679317474, "logits/chosen": 0.7743275761604309, "logits/rejected": 0.7602617740631104, "logps/chosen": -1.5566580295562744, "logps/rejected": -5.1555633544921875, "loss": 0.6579, "nll_loss": 0.6388145685195923, "rewards/accuracies": 1.0, "rewards/chosen": -0.1556658148765564, "rewards/margins": 0.35989052057266235, "rewards/rejected": -0.5155563354492188, "step": 3346 }, { "epoch": 9.163586584531142, "grad_norm": 3.027952194213867, "learning_rate": 5.415068493150685e-07, "log_odds_chosen": 2.4929258823394775, "log_odds_ratio": -0.18412567675113678, "logits/chosen": 0.7637594938278198, "logits/rejected": 0.7274578213691711, "logps/chosen": -1.858364462852478, "logps/rejected": -4.120161056518555, "loss": 0.6575, "nll_loss": 0.6391015648841858, "rewards/accuracies": 1.0, "rewards/chosen": -0.18583644926548004, "rewards/margins": 0.22617965936660767, "rewards/rejected": -0.4120160937309265, "step": 3347 }, { "epoch": 9.166324435318275, "grad_norm": 3.9298219680786133, "learning_rate": 5.413698630136986e-07, "log_odds_chosen": 1.5282069444656372, "log_odds_ratio": -0.330280601978302, "logits/chosen": 0.9600304365158081, "logits/rejected": 0.9616749286651611, "logps/chosen": -1.8411805629730225, "logps/rejected": -3.207797050476074, "loss": 0.5742, "nll_loss": 0.5412209630012512, "rewards/accuracies": 1.0, "rewards/chosen": -0.18411806225776672, "rewards/margins": 0.1366616189479828, "rewards/rejected": -0.3207797110080719, "step": 3348 }, { "epoch": 9.169062286105408, "grad_norm": 4.041045188903809, "learning_rate": 5.412328767123288e-07, "log_odds_chosen": 1.7666407823562622, "log_odds_ratio": -0.3287737965583801, "logits/chosen": 0.8362833261489868, "logits/rejected": 0.8045991659164429, "logps/chosen": -2.3365378379821777, "logps/rejected": -3.992591142654419, "loss": 0.643, "nll_loss": 0.610121488571167, "rewards/accuracies": 0.875, "rewards/chosen": -0.23365378379821777, "rewards/margins": 0.1656053364276886, "rewards/rejected": -0.399259090423584, "step": 3349 }, { "epoch": 9.17180013689254, "grad_norm": 3.0640738010406494, "learning_rate": 5.410958904109589e-07, "log_odds_chosen": 2.5086302757263184, "log_odds_ratio": -0.17579509317874908, "logits/chosen": 0.8587414026260376, "logits/rejected": 0.8266234397888184, "logps/chosen": -1.9378584623336792, "logps/rejected": -4.266688346862793, "loss": 0.5756, "nll_loss": 0.5579761266708374, "rewards/accuracies": 1.0, "rewards/chosen": -0.19378584623336792, "rewards/margins": 0.2328830063343048, "rewards/rejected": -0.42666885256767273, "step": 3350 }, { "epoch": 9.174537987679672, "grad_norm": 3.1035783290863037, "learning_rate": 5.40958904109589e-07, "log_odds_chosen": 2.748490333557129, "log_odds_ratio": -0.21544530987739563, "logits/chosen": 0.8413324952125549, "logits/rejected": 0.8283356428146362, "logps/chosen": -2.2920401096343994, "logps/rejected": -4.921454429626465, "loss": 0.5573, "nll_loss": 0.5358021855354309, "rewards/accuracies": 0.875, "rewards/chosen": -0.229203999042511, "rewards/margins": 0.26294147968292236, "rewards/rejected": -0.49214550852775574, "step": 3351 }, { "epoch": 9.177275838466803, "grad_norm": 3.405225992202759, "learning_rate": 5.408219178082192e-07, "log_odds_chosen": 2.567633628845215, "log_odds_ratio": -0.17426665127277374, "logits/chosen": 0.927346408367157, "logits/rejected": 0.9321151971817017, "logps/chosen": -1.9104559421539307, "logps/rejected": -4.331119537353516, "loss": 0.6005, "nll_loss": 0.5830840468406677, "rewards/accuracies": 1.0, "rewards/chosen": -0.1910456120967865, "rewards/margins": 0.24206633865833282, "rewards/rejected": -0.43311190605163574, "step": 3352 }, { "epoch": 9.180013689253936, "grad_norm": 3.6587576866149902, "learning_rate": 5.406849315068492e-07, "log_odds_chosen": 1.9440100193023682, "log_odds_ratio": -0.28062817454338074, "logits/chosen": 0.9130910634994507, "logits/rejected": 0.8385016322135925, "logps/chosen": -1.1598894596099854, "logps/rejected": -2.83931303024292, "loss": 0.5012, "nll_loss": 0.47316277027130127, "rewards/accuracies": 0.875, "rewards/chosen": -0.11598895490169525, "rewards/margins": 0.1679423600435257, "rewards/rejected": -0.28393131494522095, "step": 3353 }, { "epoch": 9.182751540041068, "grad_norm": 5.995995998382568, "learning_rate": 5.405479452054794e-07, "log_odds_chosen": 1.43717622756958, "log_odds_ratio": -0.9050799608230591, "logits/chosen": 0.7650592923164368, "logits/rejected": 0.6991522312164307, "logps/chosen": -2.203486442565918, "logps/rejected": -3.4963924884796143, "loss": 0.7091, "nll_loss": 0.6186413168907166, "rewards/accuracies": 0.625, "rewards/chosen": -0.22034865617752075, "rewards/margins": 0.1292905956506729, "rewards/rejected": -0.34963926672935486, "step": 3354 }, { "epoch": 9.1854893908282, "grad_norm": 3.452986240386963, "learning_rate": 5.404109589041096e-07, "log_odds_chosen": 1.0684726238250732, "log_odds_ratio": -0.3649545907974243, "logits/chosen": 0.7203059196472168, "logits/rejected": 0.6405408382415771, "logps/chosen": -1.7792776823043823, "logps/rejected": -2.75994873046875, "loss": 0.7134, "nll_loss": 0.6769441366195679, "rewards/accuracies": 0.875, "rewards/chosen": -0.17792776226997375, "rewards/margins": 0.09806710481643677, "rewards/rejected": -0.2759948968887329, "step": 3355 }, { "epoch": 9.188227241615332, "grad_norm": 3.4753684997558594, "learning_rate": 5.402739726027396e-07, "log_odds_chosen": 2.9331932067871094, "log_odds_ratio": -0.14291943609714508, "logits/chosen": 1.1764471530914307, "logits/rejected": 1.2482064962387085, "logps/chosen": -1.882596731185913, "logps/rejected": -4.586960315704346, "loss": 0.5768, "nll_loss": 0.5624657869338989, "rewards/accuracies": 1.0, "rewards/chosen": -0.18825969099998474, "rewards/margins": 0.2704363465309143, "rewards/rejected": -0.45869600772857666, "step": 3356 }, { "epoch": 9.190965092402465, "grad_norm": 3.2196273803710938, "learning_rate": 5.401369863013698e-07, "log_odds_chosen": 3.843018054962158, "log_odds_ratio": -0.09780731052160263, "logits/chosen": 0.9090140461921692, "logits/rejected": 0.9053571224212646, "logps/chosen": -1.684200644493103, "logps/rejected": -5.278979301452637, "loss": 0.4919, "nll_loss": 0.48207682371139526, "rewards/accuracies": 1.0, "rewards/chosen": -0.16842007637023926, "rewards/margins": 0.3594779372215271, "rewards/rejected": -0.5278979539871216, "step": 3357 }, { "epoch": 9.193702943189596, "grad_norm": 4.3290228843688965, "learning_rate": 5.4e-07, "log_odds_chosen": 3.3572654724121094, "log_odds_ratio": -0.25855642557144165, "logits/chosen": 1.0119068622589111, "logits/rejected": 0.9861478805541992, "logps/chosen": -2.0069420337677, "logps/rejected": -5.215235710144043, "loss": 0.6564, "nll_loss": 0.6305850744247437, "rewards/accuracies": 0.875, "rewards/chosen": -0.20069420337677002, "rewards/margins": 0.3208293914794922, "rewards/rejected": -0.5215235948562622, "step": 3358 }, { "epoch": 9.196440793976729, "grad_norm": 2.8104240894317627, "learning_rate": 5.398630136986301e-07, "log_odds_chosen": 2.0537054538726807, "log_odds_ratio": -0.24270713329315186, "logits/chosen": 0.9725971221923828, "logits/rejected": 0.9206840991973877, "logps/chosen": -1.4318084716796875, "logps/rejected": -3.235170364379883, "loss": 0.627, "nll_loss": 0.6027147769927979, "rewards/accuracies": 1.0, "rewards/chosen": -0.14318084716796875, "rewards/margins": 0.18033620715141296, "rewards/rejected": -0.3235170543193817, "step": 3359 }, { "epoch": 9.19917864476386, "grad_norm": 3.8111894130706787, "learning_rate": 5.397260273972602e-07, "log_odds_chosen": 2.4011528491973877, "log_odds_ratio": -0.1526973843574524, "logits/chosen": 1.0849186182022095, "logits/rejected": 1.0659798383712769, "logps/chosen": -2.0504400730133057, "logps/rejected": -4.258697032928467, "loss": 0.6599, "nll_loss": 0.6445838212966919, "rewards/accuracies": 1.0, "rewards/chosen": -0.20504401624202728, "rewards/margins": 0.2208256870508194, "rewards/rejected": -0.4258697032928467, "step": 3360 }, { "epoch": 9.201916495550993, "grad_norm": 3.3001837730407715, "learning_rate": 5.395890410958904e-07, "log_odds_chosen": 3.5864086151123047, "log_odds_ratio": -0.10724249482154846, "logits/chosen": 1.0982155799865723, "logits/rejected": 1.1237950325012207, "logps/chosen": -1.7904698848724365, "logps/rejected": -5.133669853210449, "loss": 0.6292, "nll_loss": 0.6185206770896912, "rewards/accuracies": 1.0, "rewards/chosen": -0.17904698848724365, "rewards/margins": 0.3343200087547302, "rewards/rejected": -0.5133669972419739, "step": 3361 }, { "epoch": 9.204654346338124, "grad_norm": 3.3333969116210938, "learning_rate": 5.394520547945205e-07, "log_odds_chosen": 2.8310365676879883, "log_odds_ratio": -0.2521302103996277, "logits/chosen": 0.9683330655097961, "logits/rejected": 1.002960205078125, "logps/chosen": -2.104710578918457, "logps/rejected": -4.813039302825928, "loss": 0.6556, "nll_loss": 0.6304144859313965, "rewards/accuracies": 1.0, "rewards/chosen": -0.21047106385231018, "rewards/margins": 0.2708328664302826, "rewards/rejected": -0.4813039302825928, "step": 3362 }, { "epoch": 9.207392197125257, "grad_norm": 3.1055901050567627, "learning_rate": 5.393150684931506e-07, "log_odds_chosen": 2.8386592864990234, "log_odds_ratio": -0.12639063596725464, "logits/chosen": 1.092659592628479, "logits/rejected": 1.0917773246765137, "logps/chosen": -1.6533328294754028, "logps/rejected": -4.2677412033081055, "loss": 0.5377, "nll_loss": 0.5250138640403748, "rewards/accuracies": 1.0, "rewards/chosen": -0.16533327102661133, "rewards/margins": 0.26144087314605713, "rewards/rejected": -0.42677414417266846, "step": 3363 }, { "epoch": 9.210130047912388, "grad_norm": 3.0126280784606934, "learning_rate": 5.391780821917808e-07, "log_odds_chosen": 3.7906408309936523, "log_odds_ratio": -0.041346266865730286, "logits/chosen": 1.064434289932251, "logits/rejected": 1.0595927238464355, "logps/chosen": -1.7623381614685059, "logps/rejected": -5.330781936645508, "loss": 0.659, "nll_loss": 0.6548195481300354, "rewards/accuracies": 1.0, "rewards/chosen": -0.17623382806777954, "rewards/margins": 0.35684439539909363, "rewards/rejected": -0.5330781936645508, "step": 3364 }, { "epoch": 9.212867898699521, "grad_norm": 3.9038686752319336, "learning_rate": 5.390410958904109e-07, "log_odds_chosen": 1.7457603216171265, "log_odds_ratio": -0.4096331000328064, "logits/chosen": 0.8133735656738281, "logits/rejected": 0.7580312490463257, "logps/chosen": -2.240920305252075, "logps/rejected": -3.8836233615875244, "loss": 0.6502, "nll_loss": 0.6092653274536133, "rewards/accuracies": 0.75, "rewards/chosen": -0.224092036485672, "rewards/margins": 0.1642703115940094, "rewards/rejected": -0.3883623480796814, "step": 3365 }, { "epoch": 9.215605749486652, "grad_norm": 3.1152026653289795, "learning_rate": 5.389041095890411e-07, "log_odds_chosen": 2.647135019302368, "log_odds_ratio": -0.24129751324653625, "logits/chosen": 0.8383409380912781, "logits/rejected": 0.8265389800071716, "logps/chosen": -1.961033582687378, "logps/rejected": -4.462703704833984, "loss": 0.6373, "nll_loss": 0.6132186055183411, "rewards/accuracies": 1.0, "rewards/chosen": -0.19610336422920227, "rewards/margins": 0.25016695261001587, "rewards/rejected": -0.44627031683921814, "step": 3366 }, { "epoch": 9.218343600273785, "grad_norm": 5.037170886993408, "learning_rate": 5.387671232876711e-07, "log_odds_chosen": 1.3200409412384033, "log_odds_ratio": -0.6243707537651062, "logits/chosen": 0.8698416948318481, "logits/rejected": 0.9138810634613037, "logps/chosen": -2.010340690612793, "logps/rejected": -3.138591766357422, "loss": 0.684, "nll_loss": 0.6215569376945496, "rewards/accuracies": 0.75, "rewards/chosen": -0.2010340839624405, "rewards/margins": 0.11282512545585632, "rewards/rejected": -0.3138591945171356, "step": 3367 }, { "epoch": 9.221081451060916, "grad_norm": 3.492640733718872, "learning_rate": 5.386301369863013e-07, "log_odds_chosen": 3.216657876968384, "log_odds_ratio": -0.23749947547912598, "logits/chosen": 0.9202291965484619, "logits/rejected": 0.9756662249565125, "logps/chosen": -2.034856081008911, "logps/rejected": -5.112126350402832, "loss": 0.7337, "nll_loss": 0.7099580764770508, "rewards/accuracies": 0.875, "rewards/chosen": -0.2034856230020523, "rewards/margins": 0.30772703886032104, "rewards/rejected": -0.5112126469612122, "step": 3368 }, { "epoch": 9.22381930184805, "grad_norm": 10.409313201904297, "learning_rate": 5.384931506849315e-07, "log_odds_chosen": 1.5276604890823364, "log_odds_ratio": -0.780982494354248, "logits/chosen": 1.1813145875930786, "logits/rejected": 1.207440733909607, "logps/chosen": -3.448005437850952, "logps/rejected": -4.772947788238525, "loss": 0.6824, "nll_loss": 0.6043370366096497, "rewards/accuracies": 0.625, "rewards/chosen": -0.34480053186416626, "rewards/margins": 0.132494255900383, "rewards/rejected": -0.47729480266571045, "step": 3369 }, { "epoch": 9.22655715263518, "grad_norm": 4.495461940765381, "learning_rate": 5.383561643835615e-07, "log_odds_chosen": 3.796637535095215, "log_odds_ratio": -0.5286115407943726, "logits/chosen": 0.9051353931427002, "logits/rejected": 0.9922537207603455, "logps/chosen": -2.50187087059021, "logps/rejected": -6.196690559387207, "loss": 0.8184, "nll_loss": 0.765541136264801, "rewards/accuracies": 0.875, "rewards/chosen": -0.25018709897994995, "rewards/margins": 0.36948198080062866, "rewards/rejected": -0.6196690201759338, "step": 3370 }, { "epoch": 9.229295003422314, "grad_norm": 3.32883358001709, "learning_rate": 5.382191780821917e-07, "log_odds_chosen": 1.7541524171829224, "log_odds_ratio": -0.35100287199020386, "logits/chosen": 0.9317870736122131, "logits/rejected": 0.8378462791442871, "logps/chosen": -1.8163036108016968, "logps/rejected": -3.456012725830078, "loss": 0.7187, "nll_loss": 0.6835876107215881, "rewards/accuracies": 0.875, "rewards/chosen": -0.18163037300109863, "rewards/margins": 0.1639709174633026, "rewards/rejected": -0.34560126066207886, "step": 3371 }, { "epoch": 9.232032854209445, "grad_norm": 5.041818618774414, "learning_rate": 5.380821917808219e-07, "log_odds_chosen": 3.811309814453125, "log_odds_ratio": -0.1762048900127411, "logits/chosen": 0.8711597919464111, "logits/rejected": 0.8413194417953491, "logps/chosen": -1.8734982013702393, "logps/rejected": -5.512877464294434, "loss": 0.6855, "nll_loss": 0.667876660823822, "rewards/accuracies": 1.0, "rewards/chosen": -0.1873498260974884, "rewards/margins": 0.36393797397613525, "rewards/rejected": -0.551287829875946, "step": 3372 }, { "epoch": 9.234770704996578, "grad_norm": 3.331552267074585, "learning_rate": 5.37945205479452e-07, "log_odds_chosen": 1.8156298398971558, "log_odds_ratio": -0.24086827039718628, "logits/chosen": 0.6396729350090027, "logits/rejected": 0.5869552493095398, "logps/chosen": -1.8774054050445557, "logps/rejected": -3.553783416748047, "loss": 0.5876, "nll_loss": 0.5635550022125244, "rewards/accuracies": 1.0, "rewards/chosen": -0.18774054944515228, "rewards/margins": 0.16763779520988464, "rewards/rejected": -0.35537832975387573, "step": 3373 }, { "epoch": 9.23750855578371, "grad_norm": 3.4369864463806152, "learning_rate": 5.378082191780821e-07, "log_odds_chosen": 1.3827917575836182, "log_odds_ratio": -0.31877559423446655, "logits/chosen": 1.068096399307251, "logits/rejected": 1.055736780166626, "logps/chosen": -2.7483317852020264, "logps/rejected": -4.083652973175049, "loss": 0.6167, "nll_loss": 0.5848113298416138, "rewards/accuracies": 0.875, "rewards/chosen": -0.27483317255973816, "rewards/margins": 0.1335320919752121, "rewards/rejected": -0.40836527943611145, "step": 3374 }, { "epoch": 9.240246406570842, "grad_norm": 3.5513436794281006, "learning_rate": 5.376712328767123e-07, "log_odds_chosen": 1.6204566955566406, "log_odds_ratio": -0.24625900387763977, "logits/chosen": 0.6942952275276184, "logits/rejected": 0.6122692823410034, "logps/chosen": -1.6575818061828613, "logps/rejected": -3.0741968154907227, "loss": 0.5905, "nll_loss": 0.5659083724021912, "rewards/accuracies": 1.0, "rewards/chosen": -0.1657581776380539, "rewards/margins": 0.14166152477264404, "rewards/rejected": -0.30741971731185913, "step": 3375 }, { "epoch": 9.242984257357975, "grad_norm": 3.604979991912842, "learning_rate": 5.375342465753424e-07, "log_odds_chosen": 1.6461917161941528, "log_odds_ratio": -0.3285771608352661, "logits/chosen": 0.7606713771820068, "logits/rejected": 0.7104904055595398, "logps/chosen": -1.468237280845642, "logps/rejected": -2.949810028076172, "loss": 0.6144, "nll_loss": 0.5815907716751099, "rewards/accuracies": 1.0, "rewards/chosen": -0.1468237340450287, "rewards/margins": 0.1481572836637497, "rewards/rejected": -0.2949810028076172, "step": 3376 }, { "epoch": 9.245722108145106, "grad_norm": 3.725220203399658, "learning_rate": 5.373972602739725e-07, "log_odds_chosen": 3.0971579551696777, "log_odds_ratio": -0.34407883882522583, "logits/chosen": 0.8164688944816589, "logits/rejected": 0.7995404005050659, "logps/chosen": -2.2325501441955566, "logps/rejected": -5.224771499633789, "loss": 0.6364, "nll_loss": 0.6019736528396606, "rewards/accuracies": 0.75, "rewards/chosen": -0.2232550084590912, "rewards/margins": 0.2992221713066101, "rewards/rejected": -0.5224771499633789, "step": 3377 }, { "epoch": 9.248459958932239, "grad_norm": 3.3784382343292236, "learning_rate": 5.372602739726027e-07, "log_odds_chosen": 1.5122981071472168, "log_odds_ratio": -0.3954687714576721, "logits/chosen": 0.7842470407485962, "logits/rejected": 0.7508481740951538, "logps/chosen": -1.4662599563598633, "logps/rejected": -2.854487657546997, "loss": 0.639, "nll_loss": 0.5994750261306763, "rewards/accuracies": 0.875, "rewards/chosen": -0.14662599563598633, "rewards/margins": 0.1388227641582489, "rewards/rejected": -0.28544875979423523, "step": 3378 }, { "epoch": 9.25119780971937, "grad_norm": 4.162207126617432, "learning_rate": 5.371232876712328e-07, "log_odds_chosen": 1.662316918373108, "log_odds_ratio": -0.340982049703598, "logits/chosen": 0.9303057193756104, "logits/rejected": 0.9247678518295288, "logps/chosen": -1.761864423751831, "logps/rejected": -3.2745320796966553, "loss": 0.6014, "nll_loss": 0.5673340559005737, "rewards/accuracies": 0.875, "rewards/chosen": -0.1761864423751831, "rewards/margins": 0.15126678347587585, "rewards/rejected": -0.32745322585105896, "step": 3379 }, { "epoch": 9.253935660506503, "grad_norm": 2.9618849754333496, "learning_rate": 5.36986301369863e-07, "log_odds_chosen": 3.7276437282562256, "log_odds_ratio": -0.1528874635696411, "logits/chosen": 0.9584117531776428, "logits/rejected": 0.9731073379516602, "logps/chosen": -1.8989408016204834, "logps/rejected": -5.4814653396606445, "loss": 0.5836, "nll_loss": 0.5682684779167175, "rewards/accuracies": 1.0, "rewards/chosen": -0.18989408016204834, "rewards/margins": 0.3582524359226227, "rewards/rejected": -0.5481464862823486, "step": 3380 }, { "epoch": 9.256673511293634, "grad_norm": 4.2035298347473145, "learning_rate": 5.368493150684931e-07, "log_odds_chosen": 2.9372024536132812, "log_odds_ratio": -0.21862174570560455, "logits/chosen": 1.2945475578308105, "logits/rejected": 1.329972267150879, "logps/chosen": -1.9389230012893677, "logps/rejected": -4.731378555297852, "loss": 0.5684, "nll_loss": 0.5465612411499023, "rewards/accuracies": 0.875, "rewards/chosen": -0.19389230012893677, "rewards/margins": 0.2792455554008484, "rewards/rejected": -0.47313785552978516, "step": 3381 }, { "epoch": 9.259411362080767, "grad_norm": 3.946303367614746, "learning_rate": 5.367123287671232e-07, "log_odds_chosen": 1.5362484455108643, "log_odds_ratio": -0.34957122802734375, "logits/chosen": 0.84697425365448, "logits/rejected": 0.7793505787849426, "logps/chosen": -1.4345932006835938, "logps/rejected": -2.8234899044036865, "loss": 0.5766, "nll_loss": 0.541663646697998, "rewards/accuracies": 1.0, "rewards/chosen": -0.14345932006835938, "rewards/margins": 0.13888968527317047, "rewards/rejected": -0.28234899044036865, "step": 3382 }, { "epoch": 9.262149212867898, "grad_norm": 3.917484760284424, "learning_rate": 5.365753424657534e-07, "log_odds_chosen": 1.314176321029663, "log_odds_ratio": -0.41111135482788086, "logits/chosen": 0.844700813293457, "logits/rejected": 0.7967285513877869, "logps/chosen": -1.7844468355178833, "logps/rejected": -2.9470365047454834, "loss": 0.5482, "nll_loss": 0.5070607662200928, "rewards/accuracies": 0.875, "rewards/chosen": -0.17844468355178833, "rewards/margins": 0.11625897139310837, "rewards/rejected": -0.2947036623954773, "step": 3383 }, { "epoch": 9.264887063655031, "grad_norm": 4.893893718719482, "learning_rate": 5.364383561643834e-07, "log_odds_chosen": 2.4392526149749756, "log_odds_ratio": -0.2620013356208801, "logits/chosen": 0.3700979948043823, "logits/rejected": 0.4097566604614258, "logps/chosen": -2.294616222381592, "logps/rejected": -4.5675458908081055, "loss": 0.7597, "nll_loss": 0.7335143685340881, "rewards/accuracies": 0.875, "rewards/chosen": -0.22946161031723022, "rewards/margins": 0.227292999625206, "rewards/rejected": -0.456754595041275, "step": 3384 }, { "epoch": 9.267624914442163, "grad_norm": 4.464822769165039, "learning_rate": 5.363013698630136e-07, "log_odds_chosen": 1.3471084833145142, "log_odds_ratio": -0.4363385736942291, "logits/chosen": 1.0096068382263184, "logits/rejected": 0.9888906478881836, "logps/chosen": -1.6167501211166382, "logps/rejected": -2.749168634414673, "loss": 0.5286, "nll_loss": 0.4850146770477295, "rewards/accuracies": 0.75, "rewards/chosen": -0.16167500615119934, "rewards/margins": 0.11324185132980347, "rewards/rejected": -0.2749168574810028, "step": 3385 }, { "epoch": 9.270362765229295, "grad_norm": 5.887195110321045, "learning_rate": 5.361643835616439e-07, "log_odds_chosen": 4.4864606857299805, "log_odds_ratio": -0.3798811137676239, "logits/chosen": 1.214073896408081, "logits/rejected": 1.2288544178009033, "logps/chosen": -3.1819944381713867, "logps/rejected": -7.606023788452148, "loss": 0.7433, "nll_loss": 0.7053481340408325, "rewards/accuracies": 0.875, "rewards/chosen": -0.3181994557380676, "rewards/margins": 0.4424029588699341, "rewards/rejected": -0.7606024146080017, "step": 3386 }, { "epoch": 9.273100616016427, "grad_norm": 3.1175448894500732, "learning_rate": 5.36027397260274e-07, "log_odds_chosen": 2.1104390621185303, "log_odds_ratio": -0.3317210078239441, "logits/chosen": 0.6942943930625916, "logits/rejected": 0.6785601377487183, "logps/chosen": -2.320204734802246, "logps/rejected": -4.363595008850098, "loss": 0.6464, "nll_loss": 0.6131981015205383, "rewards/accuracies": 0.875, "rewards/chosen": -0.23202046751976013, "rewards/margins": 0.20433905720710754, "rewards/rejected": -0.4363594949245453, "step": 3387 }, { "epoch": 9.27583846680356, "grad_norm": 5.232205390930176, "learning_rate": 5.35890410958904e-07, "log_odds_chosen": 2.247056484222412, "log_odds_ratio": -0.24913518130779266, "logits/chosen": 0.9783352613449097, "logits/rejected": 0.9892021417617798, "logps/chosen": -2.3285071849823, "logps/rejected": -4.4592390060424805, "loss": 0.6266, "nll_loss": 0.6016385555267334, "rewards/accuracies": 0.875, "rewards/chosen": -0.23285073041915894, "rewards/margins": 0.21307314932346344, "rewards/rejected": -0.44592392444610596, "step": 3388 }, { "epoch": 9.27857631759069, "grad_norm": 6.138767242431641, "learning_rate": 5.357534246575343e-07, "log_odds_chosen": 0.4794977903366089, "log_odds_ratio": -0.6574952602386475, "logits/chosen": 0.7689872980117798, "logits/rejected": 0.712600827217102, "logps/chosen": -1.8659766912460327, "logps/rejected": -2.2058234214782715, "loss": 0.6792, "nll_loss": 0.6134343147277832, "rewards/accuracies": 0.5, "rewards/chosen": -0.18659767508506775, "rewards/margins": 0.033984676003456116, "rewards/rejected": -0.22058235108852386, "step": 3389 }, { "epoch": 9.281314168377824, "grad_norm": 4.327081680297852, "learning_rate": 5.356164383561644e-07, "log_odds_chosen": 2.0727524757385254, "log_odds_ratio": -0.37309297919273376, "logits/chosen": 0.8388286232948303, "logits/rejected": 0.8648011684417725, "logps/chosen": -2.1357202529907227, "logps/rejected": -3.997938394546509, "loss": 0.5658, "nll_loss": 0.5284477472305298, "rewards/accuracies": 0.875, "rewards/chosen": -0.21357204020023346, "rewards/margins": 0.18622179329395294, "rewards/rejected": -0.399793803691864, "step": 3390 }, { "epoch": 9.284052019164955, "grad_norm": 3.14315128326416, "learning_rate": 5.354794520547945e-07, "log_odds_chosen": 3.336146593093872, "log_odds_ratio": -0.09848971664905548, "logits/chosen": 1.0408692359924316, "logits/rejected": 1.071754813194275, "logps/chosen": -1.8392829895019531, "logps/rejected": -4.971181869506836, "loss": 0.6411, "nll_loss": 0.6312680840492249, "rewards/accuracies": 1.0, "rewards/chosen": -0.18392831087112427, "rewards/margins": 0.31318992376327515, "rewards/rejected": -0.4971182346343994, "step": 3391 }, { "epoch": 9.286789869952088, "grad_norm": 3.9070229530334473, "learning_rate": 5.353424657534247e-07, "log_odds_chosen": 2.2933645248413086, "log_odds_ratio": -0.25056949257850647, "logits/chosen": 1.099162220954895, "logits/rejected": 1.1050115823745728, "logps/chosen": -2.13883638381958, "logps/rejected": -4.146543025970459, "loss": 0.6778, "nll_loss": 0.6527533531188965, "rewards/accuracies": 0.875, "rewards/chosen": -0.213883638381958, "rewards/margins": 0.20077070593833923, "rewards/rejected": -0.41465434432029724, "step": 3392 }, { "epoch": 9.289527720739219, "grad_norm": 7.055717468261719, "learning_rate": 5.352054794520548e-07, "log_odds_chosen": 2.6089541912078857, "log_odds_ratio": -0.570244312286377, "logits/chosen": 0.760281503200531, "logits/rejected": 0.7313016653060913, "logps/chosen": -2.478247880935669, "logps/rejected": -4.94106912612915, "loss": 0.6987, "nll_loss": 0.6416526436805725, "rewards/accuracies": 0.75, "rewards/chosen": -0.2478247880935669, "rewards/margins": 0.246282160282135, "rewards/rejected": -0.4941069483757019, "step": 3393 }, { "epoch": 9.292265571526352, "grad_norm": 3.3816542625427246, "learning_rate": 5.35068493150685e-07, "log_odds_chosen": 2.401503562927246, "log_odds_ratio": -0.34675025939941406, "logits/chosen": 0.8760242462158203, "logits/rejected": 0.8092063665390015, "logps/chosen": -2.062811851501465, "logps/rejected": -4.294861793518066, "loss": 0.6706, "nll_loss": 0.6358919143676758, "rewards/accuracies": 0.875, "rewards/chosen": -0.2062811702489853, "rewards/margins": 0.22320502996444702, "rewards/rejected": -0.4294861853122711, "step": 3394 }, { "epoch": 9.295003422313483, "grad_norm": 3.875694513320923, "learning_rate": 5.349315068493151e-07, "log_odds_chosen": 2.176391363143921, "log_odds_ratio": -0.15300369262695312, "logits/chosen": 1.0291255712509155, "logits/rejected": 1.0799115896224976, "logps/chosen": -2.185368776321411, "logps/rejected": -4.231996536254883, "loss": 0.6616, "nll_loss": 0.6462801098823547, "rewards/accuracies": 1.0, "rewards/chosen": -0.2185368835926056, "rewards/margins": 0.20466278493404388, "rewards/rejected": -0.4231996536254883, "step": 3395 }, { "epoch": 9.297741273100616, "grad_norm": 3.6963014602661133, "learning_rate": 5.347945205479452e-07, "log_odds_chosen": 2.0202622413635254, "log_odds_ratio": -0.2743864059448242, "logits/chosen": 0.9130564332008362, "logits/rejected": 0.914951503276825, "logps/chosen": -1.9358720779418945, "logps/rejected": -3.8341829776763916, "loss": 0.6599, "nll_loss": 0.6324459314346313, "rewards/accuracies": 0.875, "rewards/chosen": -0.19358721375465393, "rewards/margins": 0.18983110785484314, "rewards/rejected": -0.38341832160949707, "step": 3396 }, { "epoch": 9.300479123887747, "grad_norm": 3.9169890880584717, "learning_rate": 5.346575342465754e-07, "log_odds_chosen": 2.84022855758667, "log_odds_ratio": -0.18839050829410553, "logits/chosen": 1.2183527946472168, "logits/rejected": 1.259536623954773, "logps/chosen": -3.3538575172424316, "logps/rejected": -6.12739896774292, "loss": 0.6537, "nll_loss": 0.6348567605018616, "rewards/accuracies": 1.0, "rewards/chosen": -0.3353857398033142, "rewards/margins": 0.2773541212081909, "rewards/rejected": -0.6127399206161499, "step": 3397 }, { "epoch": 9.30321697467488, "grad_norm": 3.6314685344696045, "learning_rate": 5.345205479452054e-07, "log_odds_chosen": 1.7271591424942017, "log_odds_ratio": -0.3559606075286865, "logits/chosen": 0.7366719841957092, "logits/rejected": 0.5821110606193542, "logps/chosen": -1.746058464050293, "logps/rejected": -3.357450246810913, "loss": 0.6968, "nll_loss": 0.6612433791160583, "rewards/accuracies": 1.0, "rewards/chosen": -0.1746058613061905, "rewards/margins": 0.16113916039466858, "rewards/rejected": -0.33574503660202026, "step": 3398 }, { "epoch": 9.305954825462011, "grad_norm": 3.2002053260803223, "learning_rate": 5.343835616438356e-07, "log_odds_chosen": 3.2977209091186523, "log_odds_ratio": -0.11279615014791489, "logits/chosen": 0.9041345119476318, "logits/rejected": 0.8920197486877441, "logps/chosen": -1.8452885150909424, "logps/rejected": -4.973793983459473, "loss": 0.652, "nll_loss": 0.6406998634338379, "rewards/accuracies": 1.0, "rewards/chosen": -0.18452885746955872, "rewards/margins": 0.31285056471824646, "rewards/rejected": -0.4973794221878052, "step": 3399 }, { "epoch": 9.308692676249144, "grad_norm": 5.226543426513672, "learning_rate": 5.342465753424658e-07, "log_odds_chosen": 2.3194332122802734, "log_odds_ratio": -0.3690764904022217, "logits/chosen": 0.8548688888549805, "logits/rejected": 0.7738686800003052, "logps/chosen": -1.9863243103027344, "logps/rejected": -4.082436561584473, "loss": 0.6967, "nll_loss": 0.6598082780838013, "rewards/accuracies": 0.875, "rewards/chosen": -0.19863244891166687, "rewards/margins": 0.2096112072467804, "rewards/rejected": -0.40824365615844727, "step": 3400 }, { "epoch": 9.311430527036277, "grad_norm": 5.204769611358643, "learning_rate": 5.341095890410959e-07, "log_odds_chosen": 1.856166124343872, "log_odds_ratio": -0.7174224853515625, "logits/chosen": 0.8358653783798218, "logits/rejected": 0.8207400441169739, "logps/chosen": -2.853278875350952, "logps/rejected": -4.678750038146973, "loss": 0.737, "nll_loss": 0.6652387976646423, "rewards/accuracies": 0.75, "rewards/chosen": -0.28532788157463074, "rewards/margins": 0.18254712224006653, "rewards/rejected": -0.46787503361701965, "step": 3401 }, { "epoch": 9.314168377823409, "grad_norm": 3.26607346534729, "learning_rate": 5.33972602739726e-07, "log_odds_chosen": 3.5796666145324707, "log_odds_ratio": -0.1302274465560913, "logits/chosen": 1.0460247993469238, "logits/rejected": 1.0525031089782715, "logps/chosen": -1.7398724555969238, "logps/rejected": -5.108818054199219, "loss": 0.5551, "nll_loss": 0.5420931577682495, "rewards/accuracies": 1.0, "rewards/chosen": -0.1739872694015503, "rewards/margins": 0.33689454197883606, "rewards/rejected": -0.510881781578064, "step": 3402 }, { "epoch": 9.316906228610542, "grad_norm": 5.661319255828857, "learning_rate": 5.338356164383562e-07, "log_odds_chosen": 2.193363904953003, "log_odds_ratio": -0.353763222694397, "logits/chosen": 1.1803836822509766, "logits/rejected": 1.1538747549057007, "logps/chosen": -2.0696144104003906, "logps/rejected": -4.087944984436035, "loss": 0.6, "nll_loss": 0.5646033883094788, "rewards/accuracies": 0.75, "rewards/chosen": -0.20696145296096802, "rewards/margins": 0.20183305442333221, "rewards/rejected": -0.40879449248313904, "step": 3403 }, { "epoch": 9.319644079397673, "grad_norm": 3.3325061798095703, "learning_rate": 5.336986301369863e-07, "log_odds_chosen": 2.0601823329925537, "log_odds_ratio": -0.3442794680595398, "logits/chosen": 0.8694354295730591, "logits/rejected": 0.9390404224395752, "logps/chosen": -1.732041358947754, "logps/rejected": -3.688148021697998, "loss": 0.6043, "nll_loss": 0.5698702931404114, "rewards/accuracies": 0.75, "rewards/chosen": -0.17320413887500763, "rewards/margins": 0.1956106424331665, "rewards/rejected": -0.3688147962093353, "step": 3404 }, { "epoch": 9.322381930184806, "grad_norm": 3.584113597869873, "learning_rate": 5.335616438356164e-07, "log_odds_chosen": 3.296145439147949, "log_odds_ratio": -0.3742000162601471, "logits/chosen": 0.8779364824295044, "logits/rejected": 0.9310404062271118, "logps/chosen": -2.578376531600952, "logps/rejected": -5.716507911682129, "loss": 0.7214, "nll_loss": 0.6839654445648193, "rewards/accuracies": 0.875, "rewards/chosen": -0.2578376829624176, "rewards/margins": 0.31381312012672424, "rewards/rejected": -0.5716508030891418, "step": 3405 }, { "epoch": 9.325119780971937, "grad_norm": 2.835909366607666, "learning_rate": 5.334246575342466e-07, "log_odds_chosen": 3.0976977348327637, "log_odds_ratio": -0.17605990171432495, "logits/chosen": 0.8172354698181152, "logits/rejected": 0.8258805274963379, "logps/chosen": -2.132814407348633, "logps/rejected": -5.113600730895996, "loss": 0.6124, "nll_loss": 0.5948293209075928, "rewards/accuracies": 1.0, "rewards/chosen": -0.21328143775463104, "rewards/margins": 0.29807862639427185, "rewards/rejected": -0.5113600492477417, "step": 3406 }, { "epoch": 9.32785763175907, "grad_norm": 3.1642677783966064, "learning_rate": 5.332876712328767e-07, "log_odds_chosen": 2.5848236083984375, "log_odds_ratio": -0.16965842247009277, "logits/chosen": 0.942733645439148, "logits/rejected": 0.9010008573532104, "logps/chosen": -1.903529405593872, "logps/rejected": -4.3187713623046875, "loss": 0.6145, "nll_loss": 0.5974926948547363, "rewards/accuracies": 1.0, "rewards/chosen": -0.19035294651985168, "rewards/margins": 0.24152420461177826, "rewards/rejected": -0.43187713623046875, "step": 3407 }, { "epoch": 9.330595482546201, "grad_norm": 3.9643337726593018, "learning_rate": 5.331506849315069e-07, "log_odds_chosen": 1.8888477087020874, "log_odds_ratio": -0.3171805739402771, "logits/chosen": 0.6575380563735962, "logits/rejected": 0.6145698428153992, "logps/chosen": -1.5820915699005127, "logps/rejected": -3.287625551223755, "loss": 0.637, "nll_loss": 0.6052836179733276, "rewards/accuracies": 1.0, "rewards/chosen": -0.15820914506912231, "rewards/margins": 0.17055341601371765, "rewards/rejected": -0.32876259088516235, "step": 3408 }, { "epoch": 9.333333333333334, "grad_norm": 4.197971343994141, "learning_rate": 5.33013698630137e-07, "log_odds_chosen": 2.7423369884490967, "log_odds_ratio": -0.3283679485321045, "logits/chosen": 1.095492959022522, "logits/rejected": 1.0968317985534668, "logps/chosen": -2.2431931495666504, "logps/rejected": -4.863314628601074, "loss": 0.5918, "nll_loss": 0.5589263439178467, "rewards/accuracies": 0.875, "rewards/chosen": -0.22431930899620056, "rewards/margins": 0.26201215386390686, "rewards/rejected": -0.4863314628601074, "step": 3409 }, { "epoch": 9.336071184120465, "grad_norm": 3.0199270248413086, "learning_rate": 5.328767123287671e-07, "log_odds_chosen": 3.1859371662139893, "log_odds_ratio": -0.18073274195194244, "logits/chosen": 1.0834519863128662, "logits/rejected": 1.1252411603927612, "logps/chosen": -2.0265724658966064, "logps/rejected": -5.075833320617676, "loss": 0.5969, "nll_loss": 0.5788689851760864, "rewards/accuracies": 1.0, "rewards/chosen": -0.20265725255012512, "rewards/margins": 0.3049261271953583, "rewards/rejected": -0.5075833797454834, "step": 3410 }, { "epoch": 9.338809034907598, "grad_norm": 2.9093868732452393, "learning_rate": 5.327397260273973e-07, "log_odds_chosen": 2.6994903087615967, "log_odds_ratio": -0.11593621969223022, "logits/chosen": 0.870812177658081, "logits/rejected": 0.8157920837402344, "logps/chosen": -1.8185818195343018, "logps/rejected": -4.315218448638916, "loss": 0.5495, "nll_loss": 0.5378671884536743, "rewards/accuracies": 1.0, "rewards/chosen": -0.18185816705226898, "rewards/margins": 0.24966368079185486, "rewards/rejected": -0.43152186274528503, "step": 3411 }, { "epoch": 9.34154688569473, "grad_norm": 4.251179218292236, "learning_rate": 5.326027397260274e-07, "log_odds_chosen": 2.849196434020996, "log_odds_ratio": -0.32125845551490784, "logits/chosen": 0.9772698283195496, "logits/rejected": 1.002010464668274, "logps/chosen": -2.1417341232299805, "logps/rejected": -4.874688148498535, "loss": 0.6464, "nll_loss": 0.6143227815628052, "rewards/accuracies": 0.875, "rewards/chosen": -0.21417340636253357, "rewards/margins": 0.27329543232917786, "rewards/rejected": -0.4874688386917114, "step": 3412 }, { "epoch": 9.344284736481862, "grad_norm": 3.1786015033721924, "learning_rate": 5.324657534246575e-07, "log_odds_chosen": 2.7699270248413086, "log_odds_ratio": -0.19163163006305695, "logits/chosen": 1.0801548957824707, "logits/rejected": 1.1459708213806152, "logps/chosen": -2.181857109069824, "logps/rejected": -4.8402628898620605, "loss": 0.6274, "nll_loss": 0.6081900000572205, "rewards/accuracies": 0.875, "rewards/chosen": -0.21818572282791138, "rewards/margins": 0.2658405900001526, "rewards/rejected": -0.48402631282806396, "step": 3413 }, { "epoch": 9.347022587268993, "grad_norm": 3.5858750343322754, "learning_rate": 5.323287671232877e-07, "log_odds_chosen": 2.9461371898651123, "log_odds_ratio": -0.28704333305358887, "logits/chosen": 0.6137067079544067, "logits/rejected": 0.5718797445297241, "logps/chosen": -1.79355788230896, "logps/rejected": -4.608421802520752, "loss": 0.7537, "nll_loss": 0.7250441908836365, "rewards/accuracies": 1.0, "rewards/chosen": -0.17935580015182495, "rewards/margins": 0.2814863920211792, "rewards/rejected": -0.46084219217300415, "step": 3414 }, { "epoch": 9.349760438056126, "grad_norm": 3.556373357772827, "learning_rate": 5.321917808219178e-07, "log_odds_chosen": 2.032406806945801, "log_odds_ratio": -0.2663414180278778, "logits/chosen": 1.189922571182251, "logits/rejected": 1.1854157447814941, "logps/chosen": -2.0862510204315186, "logps/rejected": -4.032308578491211, "loss": 0.5834, "nll_loss": 0.5567961931228638, "rewards/accuracies": 0.875, "rewards/chosen": -0.20862510800361633, "rewards/margins": 0.1946057677268982, "rewards/rejected": -0.40323084592819214, "step": 3415 }, { "epoch": 9.352498288843258, "grad_norm": 3.415525436401367, "learning_rate": 5.320547945205479e-07, "log_odds_chosen": 2.304619312286377, "log_odds_ratio": -0.2629315257072449, "logits/chosen": 0.8202916383743286, "logits/rejected": 0.7719476222991943, "logps/chosen": -1.679922103881836, "logps/rejected": -3.829873561859131, "loss": 0.5445, "nll_loss": 0.518174409866333, "rewards/accuracies": 0.875, "rewards/chosen": -0.1679922193288803, "rewards/margins": 0.2149951457977295, "rewards/rejected": -0.382987380027771, "step": 3416 }, { "epoch": 9.35523613963039, "grad_norm": 3.2664237022399902, "learning_rate": 5.319178082191781e-07, "log_odds_chosen": 2.865114212036133, "log_odds_ratio": -0.14453616738319397, "logits/chosen": 0.9228479266166687, "logits/rejected": 0.9642207622528076, "logps/chosen": -2.252455234527588, "logps/rejected": -4.915493011474609, "loss": 0.5896, "nll_loss": 0.5751136541366577, "rewards/accuracies": 1.0, "rewards/chosen": -0.22524553537368774, "rewards/margins": 0.26630374789237976, "rewards/rejected": -0.4915493130683899, "step": 3417 }, { "epoch": 9.357973990417522, "grad_norm": 4.481335163116455, "learning_rate": 5.317808219178082e-07, "log_odds_chosen": 2.9451656341552734, "log_odds_ratio": -0.21477538347244263, "logits/chosen": 0.9313344955444336, "logits/rejected": 1.0013620853424072, "logps/chosen": -2.5649733543395996, "logps/rejected": -5.398048400878906, "loss": 0.7739, "nll_loss": 0.7524352669715881, "rewards/accuracies": 1.0, "rewards/chosen": -0.2564973533153534, "rewards/margins": 0.2833074927330017, "rewards/rejected": -0.5398048162460327, "step": 3418 }, { "epoch": 9.360711841204655, "grad_norm": 3.0670619010925293, "learning_rate": 5.316438356164383e-07, "log_odds_chosen": 3.2592291831970215, "log_odds_ratio": -0.17533352971076965, "logits/chosen": 1.1954995393753052, "logits/rejected": 1.1394550800323486, "logps/chosen": -2.0325825214385986, "logps/rejected": -5.133856773376465, "loss": 0.5853, "nll_loss": 0.5677321553230286, "rewards/accuracies": 1.0, "rewards/chosen": -0.20325826108455658, "rewards/margins": 0.3101274073123932, "rewards/rejected": -0.5133857131004333, "step": 3419 }, { "epoch": 9.363449691991786, "grad_norm": 3.0401248931884766, "learning_rate": 5.315068493150685e-07, "log_odds_chosen": 3.802971839904785, "log_odds_ratio": -0.26375091075897217, "logits/chosen": 1.0421147346496582, "logits/rejected": 1.0933231115341187, "logps/chosen": -1.8670363426208496, "logps/rejected": -5.577457904815674, "loss": 0.6818, "nll_loss": 0.6554696559906006, "rewards/accuracies": 0.875, "rewards/chosen": -0.1867036372423172, "rewards/margins": 0.3710421919822693, "rewards/rejected": -0.5577458143234253, "step": 3420 }, { "epoch": 9.366187542778919, "grad_norm": 4.239124774932861, "learning_rate": 5.313698630136986e-07, "log_odds_chosen": 2.323711395263672, "log_odds_ratio": -0.2622939944267273, "logits/chosen": 1.0744537115097046, "logits/rejected": 1.044602870941162, "logps/chosen": -1.6345064640045166, "logps/rejected": -3.807738780975342, "loss": 0.5792, "nll_loss": 0.5529584884643555, "rewards/accuracies": 0.875, "rewards/chosen": -0.16345065832138062, "rewards/margins": 0.21732322871685028, "rewards/rejected": -0.3807738721370697, "step": 3421 }, { "epoch": 9.36892539356605, "grad_norm": 5.398218154907227, "learning_rate": 5.312328767123288e-07, "log_odds_chosen": 3.1813902854919434, "log_odds_ratio": -0.42401736974716187, "logits/chosen": 0.9012802243232727, "logits/rejected": 0.8663787841796875, "logps/chosen": -1.7669275999069214, "logps/rejected": -4.589520454406738, "loss": 0.7138, "nll_loss": 0.6714394092559814, "rewards/accuracies": 0.875, "rewards/chosen": -0.17669276893138885, "rewards/margins": 0.2822593152523041, "rewards/rejected": -0.45895206928253174, "step": 3422 }, { "epoch": 9.371663244353183, "grad_norm": 3.35764217376709, "learning_rate": 5.310958904109589e-07, "log_odds_chosen": 2.793574810028076, "log_odds_ratio": -0.13756360113620758, "logits/chosen": 1.085833191871643, "logits/rejected": 0.9671871662139893, "logps/chosen": -1.9054101705551147, "logps/rejected": -4.518404006958008, "loss": 0.6275, "nll_loss": 0.6137558817863464, "rewards/accuracies": 1.0, "rewards/chosen": -0.19054101407527924, "rewards/margins": 0.26129937171936035, "rewards/rejected": -0.4518403708934784, "step": 3423 }, { "epoch": 9.374401095140314, "grad_norm": 3.6588737964630127, "learning_rate": 5.30958904109589e-07, "log_odds_chosen": 2.369572639465332, "log_odds_ratio": -0.35783398151397705, "logits/chosen": 0.7347270846366882, "logits/rejected": 0.8151317834854126, "logps/chosen": -1.870661735534668, "logps/rejected": -3.989774703979492, "loss": 0.5737, "nll_loss": 0.537936806678772, "rewards/accuracies": 0.875, "rewards/chosen": -0.18706616759300232, "rewards/margins": 0.21191127598285675, "rewards/rejected": -0.39897745847702026, "step": 3424 }, { "epoch": 9.377138945927447, "grad_norm": 5.198365211486816, "learning_rate": 5.308219178082192e-07, "log_odds_chosen": 4.127358436584473, "log_odds_ratio": -0.4880370795726776, "logits/chosen": 1.0012671947479248, "logits/rejected": 1.003082275390625, "logps/chosen": -2.3921210765838623, "logps/rejected": -6.328673839569092, "loss": 0.6478, "nll_loss": 0.5989710688591003, "rewards/accuracies": 0.875, "rewards/chosen": -0.23921211063861847, "rewards/margins": 0.39365530014038086, "rewards/rejected": -0.6328674554824829, "step": 3425 }, { "epoch": 9.37987679671458, "grad_norm": 5.603088855743408, "learning_rate": 5.306849315068493e-07, "log_odds_chosen": 4.150872230529785, "log_odds_ratio": -0.40623536705970764, "logits/chosen": 0.8171234130859375, "logits/rejected": 0.7955007553100586, "logps/chosen": -1.7874906063079834, "logps/rejected": -5.790177822113037, "loss": 0.7179, "nll_loss": 0.6772729158401489, "rewards/accuracies": 0.75, "rewards/chosen": -0.17874906957149506, "rewards/margins": 0.40026870369911194, "rewards/rejected": -0.5790177583694458, "step": 3426 }, { "epoch": 9.382614647501711, "grad_norm": 2.913982391357422, "learning_rate": 5.305479452054794e-07, "log_odds_chosen": 2.8383073806762695, "log_odds_ratio": -0.18559938669204712, "logits/chosen": 0.9777724742889404, "logits/rejected": 0.9807121157646179, "logps/chosen": -1.6635671854019165, "logps/rejected": -4.322963237762451, "loss": 0.6255, "nll_loss": 0.6069744825363159, "rewards/accuracies": 1.0, "rewards/chosen": -0.16635671257972717, "rewards/margins": 0.2659396529197693, "rewards/rejected": -0.4322963356971741, "step": 3427 }, { "epoch": 9.385352498288844, "grad_norm": 3.320974588394165, "learning_rate": 5.304109589041096e-07, "log_odds_chosen": 3.296142101287842, "log_odds_ratio": -0.23120896518230438, "logits/chosen": 0.7706462740898132, "logits/rejected": 0.7739183902740479, "logps/chosen": -2.025885581970215, "logps/rejected": -5.197910308837891, "loss": 0.6338, "nll_loss": 0.61069256067276, "rewards/accuracies": 1.0, "rewards/chosen": -0.20258855819702148, "rewards/margins": 0.31720244884490967, "rewards/rejected": -0.5197910070419312, "step": 3428 }, { "epoch": 9.388090349075975, "grad_norm": 5.079037189483643, "learning_rate": 5.302739726027396e-07, "log_odds_chosen": 2.356567859649658, "log_odds_ratio": -0.25496095418930054, "logits/chosen": 0.7521680593490601, "logits/rejected": 0.7376003861427307, "logps/chosen": -2.106915235519409, "logps/rejected": -4.279755115509033, "loss": 0.6527, "nll_loss": 0.6272061467170715, "rewards/accuracies": 1.0, "rewards/chosen": -0.21069151163101196, "rewards/margins": 0.21728403866291046, "rewards/rejected": -0.4279755651950836, "step": 3429 }, { "epoch": 9.390828199863108, "grad_norm": 3.4502618312835693, "learning_rate": 5.301369863013698e-07, "log_odds_chosen": 2.2935352325439453, "log_odds_ratio": -0.17042675614356995, "logits/chosen": 0.8849713206291199, "logits/rejected": 0.851516842842102, "logps/chosen": -1.7346937656402588, "logps/rejected": -3.8763928413391113, "loss": 0.5438, "nll_loss": 0.5267347097396851, "rewards/accuracies": 1.0, "rewards/chosen": -0.17346937954425812, "rewards/margins": 0.21416990458965302, "rewards/rejected": -0.38763928413391113, "step": 3430 }, { "epoch": 9.39356605065024, "grad_norm": 4.449252605438232, "learning_rate": 5.3e-07, "log_odds_chosen": 3.74480938911438, "log_odds_ratio": -0.21296656131744385, "logits/chosen": 1.1573424339294434, "logits/rejected": 1.199187159538269, "logps/chosen": -2.0893213748931885, "logps/rejected": -5.651592254638672, "loss": 0.6328, "nll_loss": 0.6114773154258728, "rewards/accuracies": 0.875, "rewards/chosen": -0.20893214643001556, "rewards/margins": 0.3562270998954773, "rewards/rejected": -0.565159261226654, "step": 3431 }, { "epoch": 9.396303901437372, "grad_norm": 5.588107585906982, "learning_rate": 5.298630136986301e-07, "log_odds_chosen": 1.5597894191741943, "log_odds_ratio": -0.3084447383880615, "logits/chosen": 0.9770399332046509, "logits/rejected": 0.9314766526222229, "logps/chosen": -2.2956085205078125, "logps/rejected": -3.70215106010437, "loss": 0.7513, "nll_loss": 0.7204501032829285, "rewards/accuracies": 0.875, "rewards/chosen": -0.22956085205078125, "rewards/margins": 0.14065425097942352, "rewards/rejected": -0.37021511793136597, "step": 3432 }, { "epoch": 9.399041752224504, "grad_norm": 6.389007568359375, "learning_rate": 5.297260273972602e-07, "log_odds_chosen": 1.5880588293075562, "log_odds_ratio": -0.2930380702018738, "logits/chosen": 0.7625789642333984, "logits/rejected": 0.7135507464408875, "logps/chosen": -2.2749714851379395, "logps/rejected": -3.7733795642852783, "loss": 0.6076, "nll_loss": 0.5783030986785889, "rewards/accuracies": 0.875, "rewards/chosen": -0.2274971604347229, "rewards/margins": 0.1498408019542694, "rewards/rejected": -0.3773379325866699, "step": 3433 }, { "epoch": 9.401779603011637, "grad_norm": 3.464317560195923, "learning_rate": 5.295890410958904e-07, "log_odds_chosen": 3.452223777770996, "log_odds_ratio": -0.20453135669231415, "logits/chosen": 0.7865806818008423, "logits/rejected": 0.7790319919586182, "logps/chosen": -2.7224366664886475, "logps/rejected": -6.117250442504883, "loss": 0.6881, "nll_loss": 0.6676774621009827, "rewards/accuracies": 1.0, "rewards/chosen": -0.2722437083721161, "rewards/margins": 0.3394813537597656, "rewards/rejected": -0.6117250919342041, "step": 3434 }, { "epoch": 9.404517453798768, "grad_norm": 3.524484157562256, "learning_rate": 5.294520547945205e-07, "log_odds_chosen": 1.9826905727386475, "log_odds_ratio": -0.3839144706726074, "logits/chosen": 0.8920291662216187, "logits/rejected": 0.8354583978652954, "logps/chosen": -1.571065902709961, "logps/rejected": -3.4460811614990234, "loss": 0.5799, "nll_loss": 0.5415089130401611, "rewards/accuracies": 0.875, "rewards/chosen": -0.15710657835006714, "rewards/margins": 0.18750151991844177, "rewards/rejected": -0.3446080982685089, "step": 3435 }, { "epoch": 9.4072553045859, "grad_norm": 4.092460632324219, "learning_rate": 5.293150684931507e-07, "log_odds_chosen": 1.548895001411438, "log_odds_ratio": -0.4224753677845001, "logits/chosen": 0.8971620202064514, "logits/rejected": 0.8434618711471558, "logps/chosen": -1.8763816356658936, "logps/rejected": -3.275390625, "loss": 0.6802, "nll_loss": 0.6379445195198059, "rewards/accuracies": 0.875, "rewards/chosen": -0.18763817846775055, "rewards/margins": 0.13990090787410736, "rewards/rejected": -0.3275390863418579, "step": 3436 }, { "epoch": 9.409993155373032, "grad_norm": 3.1400465965270996, "learning_rate": 5.291780821917808e-07, "log_odds_chosen": 2.8256173133850098, "log_odds_ratio": -0.2778159976005554, "logits/chosen": 0.9163347482681274, "logits/rejected": 0.8218772411346436, "logps/chosen": -1.5257898569107056, "logps/rejected": -4.1377997398376465, "loss": 0.6168, "nll_loss": 0.5890204906463623, "rewards/accuracies": 1.0, "rewards/chosen": -0.15257897973060608, "rewards/margins": 0.2612009644508362, "rewards/rejected": -0.41377997398376465, "step": 3437 }, { "epoch": 9.412731006160165, "grad_norm": 3.0412864685058594, "learning_rate": 5.290410958904109e-07, "log_odds_chosen": 2.6284046173095703, "log_odds_ratio": -0.18316328525543213, "logits/chosen": 1.116772174835205, "logits/rejected": 1.1739426851272583, "logps/chosen": -1.8832951784133911, "logps/rejected": -4.268436908721924, "loss": 0.5436, "nll_loss": 0.5252827405929565, "rewards/accuracies": 1.0, "rewards/chosen": -0.18832950294017792, "rewards/margins": 0.23851418495178223, "rewards/rejected": -0.42684367299079895, "step": 3438 }, { "epoch": 9.415468856947296, "grad_norm": 3.7103068828582764, "learning_rate": 5.289041095890411e-07, "log_odds_chosen": 2.6448943614959717, "log_odds_ratio": -0.1874082088470459, "logits/chosen": 1.0537137985229492, "logits/rejected": 1.1073898077011108, "logps/chosen": -2.632847785949707, "logps/rejected": -5.161369800567627, "loss": 0.5907, "nll_loss": 0.571996808052063, "rewards/accuracies": 1.0, "rewards/chosen": -0.2632848024368286, "rewards/margins": 0.252852201461792, "rewards/rejected": -0.5161370038986206, "step": 3439 }, { "epoch": 9.418206707734429, "grad_norm": 3.521928310394287, "learning_rate": 5.287671232876712e-07, "log_odds_chosen": 4.110116481781006, "log_odds_ratio": -0.06859492510557175, "logits/chosen": 1.0431759357452393, "logits/rejected": 1.007300853729248, "logps/chosen": -1.8025743961334229, "logps/rejected": -5.731917858123779, "loss": 0.6261, "nll_loss": 0.6192310452461243, "rewards/accuracies": 1.0, "rewards/chosen": -0.18025745451450348, "rewards/margins": 0.39293432235717773, "rewards/rejected": -0.5731918215751648, "step": 3440 }, { "epoch": 9.42094455852156, "grad_norm": 4.615338325500488, "learning_rate": 5.286301369863013e-07, "log_odds_chosen": 2.3247761726379395, "log_odds_ratio": -0.4789539575576782, "logits/chosen": 1.051416277885437, "logits/rejected": 1.049617052078247, "logps/chosen": -2.1431336402893066, "logps/rejected": -4.203930377960205, "loss": 0.7965, "nll_loss": 0.7486156225204468, "rewards/accuracies": 0.875, "rewards/chosen": -0.2143133580684662, "rewards/margins": 0.20607969164848328, "rewards/rejected": -0.42039304971694946, "step": 3441 }, { "epoch": 9.423682409308693, "grad_norm": 3.502093553543091, "learning_rate": 5.284931506849315e-07, "log_odds_chosen": 1.1563758850097656, "log_odds_ratio": -0.3678120970726013, "logits/chosen": 1.070847988128662, "logits/rejected": 1.0595403909683228, "logps/chosen": -1.4233243465423584, "logps/rejected": -2.4292452335357666, "loss": 0.5652, "nll_loss": 0.5283885598182678, "rewards/accuracies": 1.0, "rewards/chosen": -0.14233243465423584, "rewards/margins": 0.10059210658073425, "rewards/rejected": -0.2429245412349701, "step": 3442 }, { "epoch": 9.426420260095824, "grad_norm": 3.804622173309326, "learning_rate": 5.283561643835615e-07, "log_odds_chosen": 3.1787338256835938, "log_odds_ratio": -0.2874438762664795, "logits/chosen": 1.0091276168823242, "logits/rejected": 0.9777886271476746, "logps/chosen": -2.4417967796325684, "logps/rejected": -5.548753261566162, "loss": 0.7272, "nll_loss": 0.6984415054321289, "rewards/accuracies": 0.875, "rewards/chosen": -0.24417968094348907, "rewards/margins": 0.31069567799568176, "rewards/rejected": -0.554875373840332, "step": 3443 }, { "epoch": 9.429158110882957, "grad_norm": 3.4611916542053223, "learning_rate": 5.282191780821917e-07, "log_odds_chosen": 2.8355865478515625, "log_odds_ratio": -0.1578882932662964, "logits/chosen": 1.023400902748108, "logits/rejected": 1.0539300441741943, "logps/chosen": -2.0404772758483887, "logps/rejected": -4.723513603210449, "loss": 0.5991, "nll_loss": 0.5832658410072327, "rewards/accuracies": 1.0, "rewards/chosen": -0.20404773950576782, "rewards/margins": 0.26830363273620605, "rewards/rejected": -0.4723513722419739, "step": 3444 }, { "epoch": 9.431895961670088, "grad_norm": 7.102338790893555, "learning_rate": 5.280821917808219e-07, "log_odds_chosen": 1.8957529067993164, "log_odds_ratio": -0.38074466586112976, "logits/chosen": 0.9458776712417603, "logits/rejected": 0.9923914074897766, "logps/chosen": -2.2202396392822266, "logps/rejected": -3.920097589492798, "loss": 0.6548, "nll_loss": 0.6166974306106567, "rewards/accuracies": 0.75, "rewards/chosen": -0.22202397882938385, "rewards/margins": 0.16998577117919922, "rewards/rejected": -0.3920097351074219, "step": 3445 }, { "epoch": 9.434633812457221, "grad_norm": 3.339437961578369, "learning_rate": 5.27945205479452e-07, "log_odds_chosen": 3.5007619857788086, "log_odds_ratio": -0.10228459537029266, "logits/chosen": 0.9181252121925354, "logits/rejected": 0.8918395638465881, "logps/chosen": -1.7576823234558105, "logps/rejected": -5.058935165405273, "loss": 0.5742, "nll_loss": 0.563983678817749, "rewards/accuracies": 1.0, "rewards/chosen": -0.17576825618743896, "rewards/margins": 0.33012527227401733, "rewards/rejected": -0.5058935284614563, "step": 3446 }, { "epoch": 9.437371663244353, "grad_norm": 4.23444938659668, "learning_rate": 5.278082191780821e-07, "log_odds_chosen": 1.836299180984497, "log_odds_ratio": -0.3834382891654968, "logits/chosen": 1.0447412729263306, "logits/rejected": 1.1347013711929321, "logps/chosen": -2.809138536453247, "logps/rejected": -4.582207679748535, "loss": 0.8119, "nll_loss": 0.7735278606414795, "rewards/accuracies": 0.875, "rewards/chosen": -0.2809138596057892, "rewards/margins": 0.1773069053888321, "rewards/rejected": -0.45822077989578247, "step": 3447 }, { "epoch": 9.440109514031485, "grad_norm": 3.4256393909454346, "learning_rate": 5.276712328767123e-07, "log_odds_chosen": 1.8611046075820923, "log_odds_ratio": -0.23387767374515533, "logits/chosen": 0.9227491617202759, "logits/rejected": 0.9695956110954285, "logps/chosen": -2.0959396362304688, "logps/rejected": -3.837827205657959, "loss": 0.6246, "nll_loss": 0.6012110710144043, "rewards/accuracies": 1.0, "rewards/chosen": -0.2095939666032791, "rewards/margins": 0.1741887480020523, "rewards/rejected": -0.3837827444076538, "step": 3448 }, { "epoch": 9.442847364818617, "grad_norm": 6.244010925292969, "learning_rate": 5.275342465753424e-07, "log_odds_chosen": 1.6723387241363525, "log_odds_ratio": -0.366013765335083, "logits/chosen": 1.046007752418518, "logits/rejected": 1.0281810760498047, "logps/chosen": -2.433696746826172, "logps/rejected": -3.891692876815796, "loss": 0.5498, "nll_loss": 0.5131683349609375, "rewards/accuracies": 0.875, "rewards/chosen": -0.2433696687221527, "rewards/margins": 0.14579959213733673, "rewards/rejected": -0.389169305562973, "step": 3449 }, { "epoch": 9.44558521560575, "grad_norm": 4.164787769317627, "learning_rate": 5.273972602739725e-07, "log_odds_chosen": 2.2446699142456055, "log_odds_ratio": -0.3202916979789734, "logits/chosen": 0.8918237090110779, "logits/rejected": 0.8600876927375793, "logps/chosen": -2.28166127204895, "logps/rejected": -4.410030364990234, "loss": 0.581, "nll_loss": 0.5489578247070312, "rewards/accuracies": 0.875, "rewards/chosen": -0.2281661480665207, "rewards/margins": 0.2128368616104126, "rewards/rejected": -0.4410029947757721, "step": 3450 }, { "epoch": 9.44832306639288, "grad_norm": 3.198807716369629, "learning_rate": 5.272602739726027e-07, "log_odds_chosen": 2.72285795211792, "log_odds_ratio": -0.16946198046207428, "logits/chosen": 1.1091041564941406, "logits/rejected": 1.1133919954299927, "logps/chosen": -1.3666912317276, "logps/rejected": -3.800480842590332, "loss": 0.5662, "nll_loss": 0.5492696762084961, "rewards/accuracies": 1.0, "rewards/chosen": -0.1366691291332245, "rewards/margins": 0.24337896704673767, "rewards/rejected": -0.38004806637763977, "step": 3451 }, { "epoch": 9.451060917180014, "grad_norm": 4.147932052612305, "learning_rate": 5.271232876712328e-07, "log_odds_chosen": 1.4548134803771973, "log_odds_ratio": -0.3556291162967682, "logits/chosen": 1.0269683599472046, "logits/rejected": 1.01990807056427, "logps/chosen": -2.3998963832855225, "logps/rejected": -3.7497177124023438, "loss": 0.6495, "nll_loss": 0.6139059066772461, "rewards/accuracies": 0.75, "rewards/chosen": -0.23998963832855225, "rewards/margins": 0.1349821388721466, "rewards/rejected": -0.37497180700302124, "step": 3452 }, { "epoch": 9.453798767967147, "grad_norm": 3.1517581939697266, "learning_rate": 5.26986301369863e-07, "log_odds_chosen": 3.873771905899048, "log_odds_ratio": -0.1102987602353096, "logits/chosen": 1.0245342254638672, "logits/rejected": 1.0236012935638428, "logps/chosen": -1.8014013767242432, "logps/rejected": -5.501652717590332, "loss": 0.5753, "nll_loss": 0.5642276406288147, "rewards/accuracies": 1.0, "rewards/chosen": -0.18014013767242432, "rewards/margins": 0.3700251877307892, "rewards/rejected": -0.5501652956008911, "step": 3453 }, { "epoch": 9.456536618754278, "grad_norm": 3.5957953929901123, "learning_rate": 5.268493150684931e-07, "log_odds_chosen": 1.6220030784606934, "log_odds_ratio": -0.23665887117385864, "logits/chosen": 0.7851158380508423, "logits/rejected": 0.7798218727111816, "logps/chosen": -2.212100028991699, "logps/rejected": -3.7087862491607666, "loss": 0.5807, "nll_loss": 0.5570305585861206, "rewards/accuracies": 1.0, "rewards/chosen": -0.22120998799800873, "rewards/margins": 0.1496686488389969, "rewards/rejected": -0.3708786368370056, "step": 3454 }, { "epoch": 9.45927446954141, "grad_norm": 4.023687362670898, "learning_rate": 5.267123287671232e-07, "log_odds_chosen": 2.35550594329834, "log_odds_ratio": -0.3179599940776825, "logits/chosen": 1.11589515209198, "logits/rejected": 1.088352918624878, "logps/chosen": -1.9529696702957153, "logps/rejected": -3.9972047805786133, "loss": 0.6934, "nll_loss": 0.6615803837776184, "rewards/accuracies": 0.875, "rewards/chosen": -0.195296972990036, "rewards/margins": 0.20442351698875427, "rewards/rejected": -0.3997204899787903, "step": 3455 }, { "epoch": 9.462012320328542, "grad_norm": 6.710028648376465, "learning_rate": 5.265753424657534e-07, "log_odds_chosen": 1.5664008855819702, "log_odds_ratio": -0.5185480713844299, "logits/chosen": 0.8621391654014587, "logits/rejected": 0.7644095420837402, "logps/chosen": -2.9505438804626465, "logps/rejected": -4.4391303062438965, "loss": 0.7618, "nll_loss": 0.7098960280418396, "rewards/accuracies": 0.75, "rewards/chosen": -0.2950543761253357, "rewards/margins": 0.14885863661766052, "rewards/rejected": -0.4439130425453186, "step": 3456 }, { "epoch": 9.464750171115675, "grad_norm": 3.417501449584961, "learning_rate": 5.264383561643835e-07, "log_odds_chosen": 1.6764507293701172, "log_odds_ratio": -0.312493234872818, "logits/chosen": 0.8503804206848145, "logits/rejected": 0.8618215322494507, "logps/chosen": -2.2775659561157227, "logps/rejected": -3.863020896911621, "loss": 0.6003, "nll_loss": 0.5690351724624634, "rewards/accuracies": 1.0, "rewards/chosen": -0.2277565896511078, "rewards/margins": 0.15854547917842865, "rewards/rejected": -0.38630208373069763, "step": 3457 }, { "epoch": 9.467488021902806, "grad_norm": 3.3436434268951416, "learning_rate": 5.263013698630136e-07, "log_odds_chosen": 3.8593273162841797, "log_odds_ratio": -0.12118448317050934, "logits/chosen": 1.0619957447052002, "logits/rejected": 1.068865180015564, "logps/chosen": -2.022517204284668, "logps/rejected": -5.655620574951172, "loss": 0.6166, "nll_loss": 0.6044538021087646, "rewards/accuracies": 1.0, "rewards/chosen": -0.20225174725055695, "rewards/margins": 0.3633103370666504, "rewards/rejected": -0.5655620694160461, "step": 3458 }, { "epoch": 9.470225872689939, "grad_norm": 3.358429193496704, "learning_rate": 5.261643835616438e-07, "log_odds_chosen": 2.0021204948425293, "log_odds_ratio": -0.20383426547050476, "logits/chosen": 0.91504967212677, "logits/rejected": 0.9770042896270752, "logps/chosen": -1.3985533714294434, "logps/rejected": -3.148252010345459, "loss": 0.5379, "nll_loss": 0.5174767971038818, "rewards/accuracies": 1.0, "rewards/chosen": -0.13985535502433777, "rewards/margins": 0.1749698519706726, "rewards/rejected": -0.3148252069950104, "step": 3459 }, { "epoch": 9.47296372347707, "grad_norm": 3.4616363048553467, "learning_rate": 5.260273972602739e-07, "log_odds_chosen": 2.3130264282226562, "log_odds_ratio": -0.34046876430511475, "logits/chosen": 1.2340871095657349, "logits/rejected": 1.1530534029006958, "logps/chosen": -2.1961045265197754, "logps/rejected": -4.3348212242126465, "loss": 0.5891, "nll_loss": 0.555056095123291, "rewards/accuracies": 0.875, "rewards/chosen": -0.21961045265197754, "rewards/margins": 0.21387167274951935, "rewards/rejected": -0.4334821403026581, "step": 3460 }, { "epoch": 9.475701574264203, "grad_norm": 3.248703718185425, "learning_rate": 5.25890410958904e-07, "log_odds_chosen": 2.9778852462768555, "log_odds_ratio": -0.27878302335739136, "logits/chosen": 1.034752368927002, "logits/rejected": 0.9771690368652344, "logps/chosen": -1.6184073686599731, "logps/rejected": -4.408880233764648, "loss": 0.6728, "nll_loss": 0.6449651718139648, "rewards/accuracies": 1.0, "rewards/chosen": -0.1618407517671585, "rewards/margins": 0.2790472209453583, "rewards/rejected": -0.440887987613678, "step": 3461 }, { "epoch": 9.478439425051334, "grad_norm": 7.739789962768555, "learning_rate": 5.257534246575342e-07, "log_odds_chosen": 1.7122130393981934, "log_odds_ratio": -0.8163458704948425, "logits/chosen": 0.943367600440979, "logits/rejected": 0.9112104177474976, "logps/chosen": -2.621011734008789, "logps/rejected": -4.2353973388671875, "loss": 0.6702, "nll_loss": 0.5885207653045654, "rewards/accuracies": 0.75, "rewards/chosen": -0.2621011734008789, "rewards/margins": 0.16143861413002014, "rewards/rejected": -0.42353975772857666, "step": 3462 }, { "epoch": 9.481177275838467, "grad_norm": 3.2388663291931152, "learning_rate": 5.256164383561643e-07, "log_odds_chosen": 2.6246297359466553, "log_odds_ratio": -0.15810035169124603, "logits/chosen": 0.8639114499092102, "logits/rejected": 0.8838132619857788, "logps/chosen": -2.575357437133789, "logps/rejected": -5.096335411071777, "loss": 0.6912, "nll_loss": 0.6753426194190979, "rewards/accuracies": 1.0, "rewards/chosen": -0.2575357258319855, "rewards/margins": 0.25209784507751465, "rewards/rejected": -0.5096335411071777, "step": 3463 }, { "epoch": 9.483915126625599, "grad_norm": 3.3150858879089355, "learning_rate": 5.254794520547944e-07, "log_odds_chosen": 2.877736806869507, "log_odds_ratio": -0.15730781853199005, "logits/chosen": 1.0357613563537598, "logits/rejected": 1.0517035722732544, "logps/chosen": -1.9309587478637695, "logps/rejected": -4.669301509857178, "loss": 0.6545, "nll_loss": 0.6387538909912109, "rewards/accuracies": 1.0, "rewards/chosen": -0.19309589266777039, "rewards/margins": 0.2738342881202698, "rewards/rejected": -0.4669301509857178, "step": 3464 }, { "epoch": 9.486652977412732, "grad_norm": 5.000107765197754, "learning_rate": 5.253424657534246e-07, "log_odds_chosen": 2.5414083003997803, "log_odds_ratio": -0.21445557475090027, "logits/chosen": 0.9240173101425171, "logits/rejected": 0.9131916165351868, "logps/chosen": -1.828148365020752, "logps/rejected": -4.201169490814209, "loss": 0.7389, "nll_loss": 0.7174650430679321, "rewards/accuracies": 1.0, "rewards/chosen": -0.1828148365020752, "rewards/margins": 0.23730212450027466, "rewards/rejected": -0.42011693120002747, "step": 3465 }, { "epoch": 9.489390828199863, "grad_norm": 4.499795913696289, "learning_rate": 5.252054794520547e-07, "log_odds_chosen": 2.070643663406372, "log_odds_ratio": -0.3815253674983978, "logits/chosen": 0.7852768301963806, "logits/rejected": 0.7361247539520264, "logps/chosen": -2.2669084072113037, "logps/rejected": -4.175952911376953, "loss": 0.7172, "nll_loss": 0.6790826320648193, "rewards/accuracies": 0.875, "rewards/chosen": -0.2266908437013626, "rewards/margins": 0.1909044235944748, "rewards/rejected": -0.4175952970981598, "step": 3466 }, { "epoch": 9.492128678986996, "grad_norm": 3.423996925354004, "learning_rate": 5.25068493150685e-07, "log_odds_chosen": 3.5582354068756104, "log_odds_ratio": -0.2473539412021637, "logits/chosen": 1.0116305351257324, "logits/rejected": 0.9414539337158203, "logps/chosen": -1.486045002937317, "logps/rejected": -4.881289482116699, "loss": 0.5728, "nll_loss": 0.5480592846870422, "rewards/accuracies": 0.875, "rewards/chosen": -0.14860449731349945, "rewards/margins": 0.33952444791793823, "rewards/rejected": -0.4881289005279541, "step": 3467 }, { "epoch": 9.494866529774127, "grad_norm": 3.522979259490967, "learning_rate": 5.24931506849315e-07, "log_odds_chosen": 1.2685283422470093, "log_odds_ratio": -0.3446286916732788, "logits/chosen": 0.8188397288322449, "logits/rejected": 0.7411421537399292, "logps/chosen": -1.6756715774536133, "logps/rejected": -2.808007001876831, "loss": 0.6202, "nll_loss": 0.5857737064361572, "rewards/accuracies": 1.0, "rewards/chosen": -0.1675671637058258, "rewards/margins": 0.1132335513830185, "rewards/rejected": -0.2808007299900055, "step": 3468 }, { "epoch": 9.49760438056126, "grad_norm": 3.216543436050415, "learning_rate": 5.247945205479452e-07, "log_odds_chosen": 3.484642505645752, "log_odds_ratio": -0.31073087453842163, "logits/chosen": 1.0821752548217773, "logits/rejected": 1.0484898090362549, "logps/chosen": -1.6493558883666992, "logps/rejected": -5.006257057189941, "loss": 0.6422, "nll_loss": 0.6111636757850647, "rewards/accuracies": 0.875, "rewards/chosen": -0.16493558883666992, "rewards/margins": 0.33569011092185974, "rewards/rejected": -0.500625729560852, "step": 3469 }, { "epoch": 9.500342231348391, "grad_norm": 3.8354578018188477, "learning_rate": 5.246575342465754e-07, "log_odds_chosen": 2.244809627532959, "log_odds_ratio": -0.30884942412376404, "logits/chosen": 0.7500842809677124, "logits/rejected": 0.7866343855857849, "logps/chosen": -1.8646876811981201, "logps/rejected": -3.9747824668884277, "loss": 0.5922, "nll_loss": 0.561272382736206, "rewards/accuracies": 0.875, "rewards/chosen": -0.18646878004074097, "rewards/margins": 0.21100947260856628, "rewards/rejected": -0.39747822284698486, "step": 3470 }, { "epoch": 9.503080082135524, "grad_norm": 3.2535603046417236, "learning_rate": 5.245205479452055e-07, "log_odds_chosen": 2.4279022216796875, "log_odds_ratio": -0.18846195936203003, "logits/chosen": 1.0471384525299072, "logits/rejected": 1.0294400453567505, "logps/chosen": -1.9633456468582153, "logps/rejected": -4.258686065673828, "loss": 0.6361, "nll_loss": 0.6172938942909241, "rewards/accuracies": 1.0, "rewards/chosen": -0.196334570646286, "rewards/margins": 0.22953404486179352, "rewards/rejected": -0.42586860060691833, "step": 3471 }, { "epoch": 9.505817932922655, "grad_norm": 4.127066135406494, "learning_rate": 5.243835616438356e-07, "log_odds_chosen": 2.6899030208587646, "log_odds_ratio": -0.25890079140663147, "logits/chosen": 0.8330780267715454, "logits/rejected": 0.8649510145187378, "logps/chosen": -2.479635715484619, "logps/rejected": -5.068769454956055, "loss": 0.6963, "nll_loss": 0.6703774929046631, "rewards/accuracies": 1.0, "rewards/chosen": -0.24796359241008759, "rewards/margins": 0.25891339778900146, "rewards/rejected": -0.5068770051002502, "step": 3472 }, { "epoch": 9.508555783709788, "grad_norm": 3.1688997745513916, "learning_rate": 5.242465753424658e-07, "log_odds_chosen": 3.0323729515075684, "log_odds_ratio": -0.16807666420936584, "logits/chosen": 1.0454119443893433, "logits/rejected": 1.0575411319732666, "logps/chosen": -2.1018028259277344, "logps/rejected": -4.978742599487305, "loss": 0.7326, "nll_loss": 0.7157453894615173, "rewards/accuracies": 1.0, "rewards/chosen": -0.21018028259277344, "rewards/margins": 0.28769391775131226, "rewards/rejected": -0.49787425994873047, "step": 3473 }, { "epoch": 9.51129363449692, "grad_norm": 4.680027961730957, "learning_rate": 5.241095890410959e-07, "log_odds_chosen": 1.9590305089950562, "log_odds_ratio": -0.34195035696029663, "logits/chosen": 1.0647294521331787, "logits/rejected": 1.0348092317581177, "logps/chosen": -2.634316921234131, "logps/rejected": -4.333622932434082, "loss": 0.6145, "nll_loss": 0.5803271532058716, "rewards/accuracies": 0.875, "rewards/chosen": -0.26343169808387756, "rewards/margins": 0.1699305921792984, "rewards/rejected": -0.43336230516433716, "step": 3474 }, { "epoch": 9.514031485284052, "grad_norm": 3.220637083053589, "learning_rate": 5.23972602739726e-07, "log_odds_chosen": 3.730029344558716, "log_odds_ratio": -0.11286067217588425, "logits/chosen": 0.6464464664459229, "logits/rejected": 0.5898877382278442, "logps/chosen": -1.4908297061920166, "logps/rejected": -4.979511260986328, "loss": 0.5722, "nll_loss": 0.560934841632843, "rewards/accuracies": 1.0, "rewards/chosen": -0.1490829735994339, "rewards/margins": 0.348868191242218, "rewards/rejected": -0.4979511499404907, "step": 3475 }, { "epoch": 9.516769336071183, "grad_norm": 8.41157341003418, "learning_rate": 5.238356164383562e-07, "log_odds_chosen": 1.4287244081497192, "log_odds_ratio": -0.3787277340888977, "logits/chosen": 0.6294946670532227, "logits/rejected": 0.5860028862953186, "logps/chosen": -2.3497586250305176, "logps/rejected": -3.456843376159668, "loss": 0.6885, "nll_loss": 0.6506137847900391, "rewards/accuracies": 0.875, "rewards/chosen": -0.23497585952281952, "rewards/margins": 0.11070848256349564, "rewards/rejected": -0.34568434953689575, "step": 3476 }, { "epoch": 9.519507186858316, "grad_norm": 3.80898380279541, "learning_rate": 5.236986301369863e-07, "log_odds_chosen": 2.986293315887451, "log_odds_ratio": -0.20904570817947388, "logits/chosen": 1.0002199411392212, "logits/rejected": 0.9788234233856201, "logps/chosen": -1.416524052619934, "logps/rejected": -4.121933460235596, "loss": 0.5101, "nll_loss": 0.48924118280410767, "rewards/accuracies": 0.875, "rewards/chosen": -0.1416524052619934, "rewards/margins": 0.2705409526824951, "rewards/rejected": -0.4121933579444885, "step": 3477 }, { "epoch": 9.522245037645447, "grad_norm": 3.8174965381622314, "learning_rate": 5.235616438356164e-07, "log_odds_chosen": 2.0153934955596924, "log_odds_ratio": -0.2935740053653717, "logits/chosen": 1.2744948863983154, "logits/rejected": 1.2960264682769775, "logps/chosen": -2.0340561866760254, "logps/rejected": -3.8719353675842285, "loss": 0.5316, "nll_loss": 0.5022624731063843, "rewards/accuracies": 0.75, "rewards/chosen": -0.20340561866760254, "rewards/margins": 0.18378791213035583, "rewards/rejected": -0.3871935307979584, "step": 3478 }, { "epoch": 9.52498288843258, "grad_norm": 3.301330804824829, "learning_rate": 5.234246575342466e-07, "log_odds_chosen": 1.8784929513931274, "log_odds_ratio": -0.3267306089401245, "logits/chosen": 1.013721227645874, "logits/rejected": 0.832797646522522, "logps/chosen": -2.460578680038452, "logps/rejected": -4.157994270324707, "loss": 0.6892, "nll_loss": 0.6565064191818237, "rewards/accuracies": 0.75, "rewards/chosen": -0.24605785310268402, "rewards/margins": 0.16974158585071564, "rewards/rejected": -0.41579943895339966, "step": 3479 }, { "epoch": 9.527720739219713, "grad_norm": 4.81679105758667, "learning_rate": 5.232876712328767e-07, "log_odds_chosen": 1.0871391296386719, "log_odds_ratio": -0.45576679706573486, "logits/chosen": 0.9729453325271606, "logits/rejected": 0.9016834497451782, "logps/chosen": -2.0626652240753174, "logps/rejected": -3.078406810760498, "loss": 0.693, "nll_loss": 0.6474611759185791, "rewards/accuracies": 0.875, "rewards/chosen": -0.20626652240753174, "rewards/margins": 0.10157416760921478, "rewards/rejected": -0.3078406751155853, "step": 3480 }, { "epoch": 9.530458590006845, "grad_norm": 5.861478328704834, "learning_rate": 5.231506849315069e-07, "log_odds_chosen": 2.124502182006836, "log_odds_ratio": -0.6221683025360107, "logits/chosen": 1.1559669971466064, "logits/rejected": 1.17686927318573, "logps/chosen": -2.814363956451416, "logps/rejected": -4.888777256011963, "loss": 0.6841, "nll_loss": 0.6218985319137573, "rewards/accuracies": 0.875, "rewards/chosen": -0.28143638372421265, "rewards/margins": 0.2074413299560547, "rewards/rejected": -0.48887771368026733, "step": 3481 }, { "epoch": 9.533196440793978, "grad_norm": 5.23038911819458, "learning_rate": 5.23013698630137e-07, "log_odds_chosen": 3.7702229022979736, "log_odds_ratio": -0.3402057886123657, "logits/chosen": 1.1214581727981567, "logits/rejected": 1.1058502197265625, "logps/chosen": -2.3760859966278076, "logps/rejected": -6.030235290527344, "loss": 0.7257, "nll_loss": 0.6917202472686768, "rewards/accuracies": 0.875, "rewards/chosen": -0.23760859668254852, "rewards/margins": 0.36541497707366943, "rewards/rejected": -0.6030235290527344, "step": 3482 }, { "epoch": 9.535934291581109, "grad_norm": 3.0450150966644287, "learning_rate": 5.228767123287671e-07, "log_odds_chosen": 3.3395323753356934, "log_odds_ratio": -0.1468208283185959, "logits/chosen": 0.9841158390045166, "logits/rejected": 0.9699306488037109, "logps/chosen": -2.330758571624756, "logps/rejected": -5.5135345458984375, "loss": 0.6268, "nll_loss": 0.6121426820755005, "rewards/accuracies": 1.0, "rewards/chosen": -0.23307585716247559, "rewards/margins": 0.31827759742736816, "rewards/rejected": -0.5513534545898438, "step": 3483 }, { "epoch": 9.538672142368242, "grad_norm": 3.6126585006713867, "learning_rate": 5.227397260273973e-07, "log_odds_chosen": 2.1327648162841797, "log_odds_ratio": -0.18837475776672363, "logits/chosen": 0.7329003810882568, "logits/rejected": 0.7236095070838928, "logps/chosen": -1.8109525442123413, "logps/rejected": -3.7564969062805176, "loss": 0.6543, "nll_loss": 0.6354328989982605, "rewards/accuracies": 1.0, "rewards/chosen": -0.18109524250030518, "rewards/margins": 0.19455444812774658, "rewards/rejected": -0.37564969062805176, "step": 3484 }, { "epoch": 9.541409993155373, "grad_norm": 3.474698066711426, "learning_rate": 5.226027397260274e-07, "log_odds_chosen": 2.1503989696502686, "log_odds_ratio": -0.29550307989120483, "logits/chosen": 0.7710373997688293, "logits/rejected": 0.7876952886581421, "logps/chosen": -1.6343940496444702, "logps/rejected": -3.542937755584717, "loss": 0.6149, "nll_loss": 0.5853007435798645, "rewards/accuracies": 0.875, "rewards/chosen": -0.16343942284584045, "rewards/margins": 0.19085440039634705, "rewards/rejected": -0.3542938232421875, "step": 3485 }, { "epoch": 9.544147843942506, "grad_norm": 3.101418972015381, "learning_rate": 5.224657534246575e-07, "log_odds_chosen": 4.769133567810059, "log_odds_ratio": -0.09681699424982071, "logits/chosen": 1.1622588634490967, "logits/rejected": 1.1731038093566895, "logps/chosen": -1.7385004758834839, "logps/rejected": -6.217988014221191, "loss": 0.6154, "nll_loss": 0.6056917309761047, "rewards/accuracies": 1.0, "rewards/chosen": -0.17385005950927734, "rewards/margins": 0.4479488134384155, "rewards/rejected": -0.6217988729476929, "step": 3486 }, { "epoch": 9.546885694729637, "grad_norm": 5.689078330993652, "learning_rate": 5.223287671232877e-07, "log_odds_chosen": 2.1101572513580322, "log_odds_ratio": -0.21636179089546204, "logits/chosen": 0.8645855188369751, "logits/rejected": 0.8629339933395386, "logps/chosen": -2.3737058639526367, "logps/rejected": -4.18154239654541, "loss": 0.6768, "nll_loss": 0.6551676988601685, "rewards/accuracies": 0.875, "rewards/chosen": -0.2373705804347992, "rewards/margins": 0.18078364431858063, "rewards/rejected": -0.418154239654541, "step": 3487 }, { "epoch": 9.54962354551677, "grad_norm": 4.196039199829102, "learning_rate": 5.221917808219179e-07, "log_odds_chosen": 2.2594151496887207, "log_odds_ratio": -0.239304780960083, "logits/chosen": 0.814609706401825, "logits/rejected": 0.7560944557189941, "logps/chosen": -2.9035017490386963, "logps/rejected": -5.064174652099609, "loss": 0.7007, "nll_loss": 0.6767694354057312, "rewards/accuracies": 1.0, "rewards/chosen": -0.29035019874572754, "rewards/margins": 0.21606725454330444, "rewards/rejected": -0.5064173936843872, "step": 3488 }, { "epoch": 9.552361396303901, "grad_norm": 3.5774006843566895, "learning_rate": 5.220547945205479e-07, "log_odds_chosen": 1.626448154449463, "log_odds_ratio": -0.3823125958442688, "logits/chosen": 0.9265628457069397, "logits/rejected": 0.7805463075637817, "logps/chosen": -1.3722140789031982, "logps/rejected": -2.800389528274536, "loss": 0.646, "nll_loss": 0.6077358722686768, "rewards/accuracies": 0.75, "rewards/chosen": -0.13722142577171326, "rewards/margins": 0.14281755685806274, "rewards/rejected": -0.280038982629776, "step": 3489 }, { "epoch": 9.555099247091034, "grad_norm": 3.7899575233459473, "learning_rate": 5.219178082191781e-07, "log_odds_chosen": 2.226165533065796, "log_odds_ratio": -0.20592176914215088, "logits/chosen": 0.7612301707267761, "logits/rejected": 0.6886098980903625, "logps/chosen": -1.3187922239303589, "logps/rejected": -3.2760133743286133, "loss": 0.5807, "nll_loss": 0.5600888729095459, "rewards/accuracies": 1.0, "rewards/chosen": -0.13187922537326813, "rewards/margins": 0.19572214782238007, "rewards/rejected": -0.3276013731956482, "step": 3490 }, { "epoch": 9.557837097878165, "grad_norm": 3.0482089519500732, "learning_rate": 5.217808219178082e-07, "log_odds_chosen": 4.158812046051025, "log_odds_ratio": -0.11021442711353302, "logits/chosen": 0.9517123103141785, "logits/rejected": 0.9622334837913513, "logps/chosen": -1.8688024282455444, "logps/rejected": -5.839038848876953, "loss": 0.6503, "nll_loss": 0.6392680406570435, "rewards/accuracies": 1.0, "rewards/chosen": -0.18688024580478668, "rewards/margins": 0.39702367782592773, "rewards/rejected": -0.5839039087295532, "step": 3491 }, { "epoch": 9.560574948665298, "grad_norm": 5.254090309143066, "learning_rate": 5.216438356164383e-07, "log_odds_chosen": 3.3160996437072754, "log_odds_ratio": -0.5434541702270508, "logits/chosen": 0.9657472968101501, "logits/rejected": 1.020491361618042, "logps/chosen": -2.4763617515563965, "logps/rejected": -5.69583797454834, "loss": 0.746, "nll_loss": 0.6916595697402954, "rewards/accuracies": 0.875, "rewards/chosen": -0.24763616919517517, "rewards/margins": 0.3219476044178009, "rewards/rejected": -0.5695837736129761, "step": 3492 }, { "epoch": 9.56331279945243, "grad_norm": 3.410884141921997, "learning_rate": 5.215068493150685e-07, "log_odds_chosen": 2.1263599395751953, "log_odds_ratio": -0.40042585134506226, "logits/chosen": 0.8094326853752136, "logits/rejected": 0.7902969121932983, "logps/chosen": -1.9720895290374756, "logps/rejected": -4.030135154724121, "loss": 0.6908, "nll_loss": 0.6507296562194824, "rewards/accuracies": 0.875, "rewards/chosen": -0.1972089409828186, "rewards/margins": 0.20580458641052246, "rewards/rejected": -0.40301355719566345, "step": 3493 }, { "epoch": 9.566050650239562, "grad_norm": 6.023321628570557, "learning_rate": 5.213698630136986e-07, "log_odds_chosen": 2.6591973304748535, "log_odds_ratio": -0.23491114377975464, "logits/chosen": 0.9768427610397339, "logits/rejected": 0.9292517304420471, "logps/chosen": -2.3801827430725098, "logps/rejected": -4.944308280944824, "loss": 0.6523, "nll_loss": 0.6288114786148071, "rewards/accuracies": 1.0, "rewards/chosen": -0.23801827430725098, "rewards/margins": 0.2564125955104828, "rewards/rejected": -0.49443086981773376, "step": 3494 }, { "epoch": 9.568788501026694, "grad_norm": 3.254119396209717, "learning_rate": 5.212328767123288e-07, "log_odds_chosen": 2.0407614707946777, "log_odds_ratio": -0.24486801028251648, "logits/chosen": 0.9936804175376892, "logits/rejected": 0.9692409634590149, "logps/chosen": -1.9131876230239868, "logps/rejected": -3.718658447265625, "loss": 0.5768, "nll_loss": 0.5523581504821777, "rewards/accuracies": 1.0, "rewards/chosen": -0.19131876528263092, "rewards/margins": 0.1805471032857895, "rewards/rejected": -0.3718658685684204, "step": 3495 }, { "epoch": 9.571526351813826, "grad_norm": 3.4409637451171875, "learning_rate": 5.210958904109589e-07, "log_odds_chosen": 1.8659069538116455, "log_odds_ratio": -0.2778010070323944, "logits/chosen": 0.6504483819007874, "logits/rejected": 0.6582478880882263, "logps/chosen": -2.3198416233062744, "logps/rejected": -4.075246810913086, "loss": 0.7576, "nll_loss": 0.7298576235771179, "rewards/accuracies": 0.875, "rewards/chosen": -0.23198416829109192, "rewards/margins": 0.1755404770374298, "rewards/rejected": -0.40752464532852173, "step": 3496 }, { "epoch": 9.574264202600958, "grad_norm": 3.0981242656707764, "learning_rate": 5.20958904109589e-07, "log_odds_chosen": 2.8580827713012695, "log_odds_ratio": -0.1713130623102188, "logits/chosen": 0.6569876670837402, "logits/rejected": 0.6067386269569397, "logps/chosen": -1.3612183332443237, "logps/rejected": -3.9861860275268555, "loss": 0.5636, "nll_loss": 0.5465072393417358, "rewards/accuracies": 1.0, "rewards/chosen": -0.13612182438373566, "rewards/margins": 0.2624967694282532, "rewards/rejected": -0.39861860871315, "step": 3497 }, { "epoch": 9.57700205338809, "grad_norm": 6.499897480010986, "learning_rate": 5.208219178082192e-07, "log_odds_chosen": 1.0352901220321655, "log_odds_ratio": -0.44526052474975586, "logits/chosen": 0.7868974208831787, "logits/rejected": 0.7234400510787964, "logps/chosen": -2.374143123626709, "logps/rejected": -3.3147177696228027, "loss": 0.6843, "nll_loss": 0.6398172974586487, "rewards/accuracies": 0.75, "rewards/chosen": -0.23741431534290314, "rewards/margins": 0.09405747056007385, "rewards/rejected": -0.3314717411994934, "step": 3498 }, { "epoch": 9.579739904175222, "grad_norm": 4.075689792633057, "learning_rate": 5.206849315068493e-07, "log_odds_chosen": 2.9013781547546387, "log_odds_ratio": -0.19143803417682648, "logits/chosen": 1.2094942331314087, "logits/rejected": 1.2440828084945679, "logps/chosen": -1.9874639511108398, "logps/rejected": -4.729144096374512, "loss": 0.6892, "nll_loss": 0.6700640916824341, "rewards/accuracies": 0.875, "rewards/chosen": -0.19874638319015503, "rewards/margins": 0.2741680443286896, "rewards/rejected": -0.472914457321167, "step": 3499 }, { "epoch": 9.582477754962355, "grad_norm": 3.0133936405181885, "learning_rate": 5.205479452054794e-07, "log_odds_chosen": 3.6399245262145996, "log_odds_ratio": -0.15554381906986237, "logits/chosen": 0.9414907693862915, "logits/rejected": 0.958773672580719, "logps/chosen": -1.2849494218826294, "logps/rejected": -4.623892784118652, "loss": 0.6047, "nll_loss": 0.5891759991645813, "rewards/accuracies": 1.0, "rewards/chosen": -0.12849494814872742, "rewards/margins": 0.3338943123817444, "rewards/rejected": -0.4623892903327942, "step": 3500 }, { "epoch": 9.585215605749486, "grad_norm": 3.529768705368042, "learning_rate": 5.204109589041096e-07, "log_odds_chosen": 2.0651438236236572, "log_odds_ratio": -0.2648063600063324, "logits/chosen": 0.7811195850372314, "logits/rejected": 0.7129383087158203, "logps/chosen": -2.534601926803589, "logps/rejected": -4.486556053161621, "loss": 0.6301, "nll_loss": 0.6036403179168701, "rewards/accuracies": 1.0, "rewards/chosen": -0.25346019864082336, "rewards/margins": 0.19519542157649994, "rewards/rejected": -0.4486556053161621, "step": 3501 }, { "epoch": 9.587953456536619, "grad_norm": 6.7233452796936035, "learning_rate": 5.202739726027398e-07, "log_odds_chosen": 0.5937548875808716, "log_odds_ratio": -0.9473627209663391, "logits/chosen": 0.9627472162246704, "logits/rejected": 0.9016039967536926, "logps/chosen": -2.283939838409424, "logps/rejected": -2.7506215572357178, "loss": 0.7167, "nll_loss": 0.6219285726547241, "rewards/accuracies": 0.875, "rewards/chosen": -0.22839398682117462, "rewards/margins": 0.046668171882629395, "rewards/rejected": -0.2750621736049652, "step": 3502 }, { "epoch": 9.59069130732375, "grad_norm": 5.201239585876465, "learning_rate": 5.201369863013698e-07, "log_odds_chosen": 2.2190651893615723, "log_odds_ratio": -0.2887941896915436, "logits/chosen": 0.8667012453079224, "logits/rejected": 0.8384426832199097, "logps/chosen": -1.2970325946807861, "logps/rejected": -3.188377857208252, "loss": 0.595, "nll_loss": 0.5661570429801941, "rewards/accuracies": 1.0, "rewards/chosen": -0.12970326840877533, "rewards/margins": 0.18913452327251434, "rewards/rejected": -0.3188377916812897, "step": 3503 }, { "epoch": 9.593429158110883, "grad_norm": 4.197296142578125, "learning_rate": 5.2e-07, "log_odds_chosen": 2.2472915649414062, "log_odds_ratio": -0.5222617983818054, "logits/chosen": 1.0747363567352295, "logits/rejected": 1.0877946615219116, "logps/chosen": -2.397127866744995, "logps/rejected": -4.538658142089844, "loss": 0.6479, "nll_loss": 0.5956891775131226, "rewards/accuracies": 0.875, "rewards/chosen": -0.23971278965473175, "rewards/margins": 0.21415302157402039, "rewards/rejected": -0.45386582612991333, "step": 3504 }, { "epoch": 9.596167008898014, "grad_norm": 3.6961982250213623, "learning_rate": 5.198630136986301e-07, "log_odds_chosen": 3.4564852714538574, "log_odds_ratio": -0.28370118141174316, "logits/chosen": 0.967758297920227, "logits/rejected": 0.9565670490264893, "logps/chosen": -1.8404271602630615, "logps/rejected": -5.115230560302734, "loss": 0.6459, "nll_loss": 0.6174803376197815, "rewards/accuracies": 0.875, "rewards/chosen": -0.18404272198677063, "rewards/margins": 0.3274803161621094, "rewards/rejected": -0.5115230679512024, "step": 3505 }, { "epoch": 9.598904859685147, "grad_norm": 3.4795167446136475, "learning_rate": 5.197260273972602e-07, "log_odds_chosen": 1.9456195831298828, "log_odds_ratio": -0.2411152422428131, "logits/chosen": 0.7634958624839783, "logits/rejected": 0.6830239295959473, "logps/chosen": -2.7367939949035645, "logps/rejected": -4.523974895477295, "loss": 0.7136, "nll_loss": 0.6894931793212891, "rewards/accuracies": 0.875, "rewards/chosen": -0.2736794054508209, "rewards/margins": 0.17871809005737305, "rewards/rejected": -0.45239749550819397, "step": 3506 }, { "epoch": 9.60164271047228, "grad_norm": 3.770214557647705, "learning_rate": 5.195890410958904e-07, "log_odds_chosen": 1.5960257053375244, "log_odds_ratio": -0.3203780949115753, "logits/chosen": 0.759269654750824, "logits/rejected": 0.7896062135696411, "logps/chosen": -1.8911511898040771, "logps/rejected": -3.3684182167053223, "loss": 0.5956, "nll_loss": 0.5636088848114014, "rewards/accuracies": 0.875, "rewards/chosen": -0.18911512196063995, "rewards/margins": 0.14772671461105347, "rewards/rejected": -0.3368418216705322, "step": 3507 }, { "epoch": 9.604380561259411, "grad_norm": 3.2637884616851807, "learning_rate": 5.194520547945205e-07, "log_odds_chosen": 4.409182548522949, "log_odds_ratio": -0.06718891113996506, "logits/chosen": 1.0510097742080688, "logits/rejected": 1.0525883436203003, "logps/chosen": -2.0162434577941895, "logps/rejected": -6.202312469482422, "loss": 0.525, "nll_loss": 0.5183174014091492, "rewards/accuracies": 1.0, "rewards/chosen": -0.20162436366081238, "rewards/margins": 0.41860684752464294, "rewards/rejected": -0.6202312111854553, "step": 3508 }, { "epoch": 9.607118412046544, "grad_norm": 3.211108922958374, "learning_rate": 5.193150684931507e-07, "log_odds_chosen": 3.9843194484710693, "log_odds_ratio": -0.03326160088181496, "logits/chosen": 1.3222682476043701, "logits/rejected": 1.3863415718078613, "logps/chosen": -2.0984530448913574, "logps/rejected": -5.857767105102539, "loss": 0.6325, "nll_loss": 0.6291580200195312, "rewards/accuracies": 1.0, "rewards/chosen": -0.20984531939029694, "rewards/margins": 0.37593144178390503, "rewards/rejected": -0.5857767462730408, "step": 3509 }, { "epoch": 9.609856262833675, "grad_norm": 4.738780498504639, "learning_rate": 5.191780821917808e-07, "log_odds_chosen": 0.7281937003135681, "log_odds_ratio": -0.5712636709213257, "logits/chosen": 0.8500180244445801, "logits/rejected": 0.7832854986190796, "logps/chosen": -2.5264506340026855, "logps/rejected": -3.179476737976074, "loss": 0.6272, "nll_loss": 0.5700933933258057, "rewards/accuracies": 0.625, "rewards/chosen": -0.2526450455188751, "rewards/margins": 0.06530261784791946, "rewards/rejected": -0.3179476857185364, "step": 3510 }, { "epoch": 9.612594113620808, "grad_norm": 3.785768508911133, "learning_rate": 5.190410958904109e-07, "log_odds_chosen": 2.017286539077759, "log_odds_ratio": -0.2836930751800537, "logits/chosen": 0.674793541431427, "logits/rejected": 0.5728964805603027, "logps/chosen": -2.1256661415100098, "logps/rejected": -3.9613142013549805, "loss": 0.6175, "nll_loss": 0.5891236066818237, "rewards/accuracies": 0.875, "rewards/chosen": -0.21256659924983978, "rewards/margins": 0.18356485664844513, "rewards/rejected": -0.3961314558982849, "step": 3511 }, { "epoch": 9.61533196440794, "grad_norm": 2.974527597427368, "learning_rate": 5.189041095890411e-07, "log_odds_chosen": 3.2860169410705566, "log_odds_ratio": -0.10678276419639587, "logits/chosen": 0.9376866817474365, "logits/rejected": 0.9164724946022034, "logps/chosen": -2.013434886932373, "logps/rejected": -5.156429290771484, "loss": 0.5734, "nll_loss": 0.5626764297485352, "rewards/accuracies": 1.0, "rewards/chosen": -0.20134347677230835, "rewards/margins": 0.31429946422576904, "rewards/rejected": -0.5156430006027222, "step": 3512 }, { "epoch": 9.618069815195073, "grad_norm": 3.064206123352051, "learning_rate": 5.187671232876712e-07, "log_odds_chosen": 3.1624815464019775, "log_odds_ratio": -0.12677928805351257, "logits/chosen": 0.8513049483299255, "logits/rejected": 0.8199945688247681, "logps/chosen": -1.9176677465438843, "logps/rejected": -4.84415340423584, "loss": 0.6029, "nll_loss": 0.5901727080345154, "rewards/accuracies": 1.0, "rewards/chosen": -0.19176678359508514, "rewards/margins": 0.292648583650589, "rewards/rejected": -0.48441535234451294, "step": 3513 }, { "epoch": 9.620807665982204, "grad_norm": 3.9528214931488037, "learning_rate": 5.186301369863013e-07, "log_odds_chosen": 2.9766452312469482, "log_odds_ratio": -0.13735604286193848, "logits/chosen": 0.6965931057929993, "logits/rejected": 0.6346185803413391, "logps/chosen": -1.838283658027649, "logps/rejected": -4.523379325866699, "loss": 0.7019, "nll_loss": 0.6881638765335083, "rewards/accuracies": 1.0, "rewards/chosen": -0.18382835388183594, "rewards/margins": 0.26850956678390503, "rewards/rejected": -0.45233792066574097, "step": 3514 }, { "epoch": 9.623545516769337, "grad_norm": 4.01852560043335, "learning_rate": 5.184931506849315e-07, "log_odds_chosen": 1.6748089790344238, "log_odds_ratio": -0.2309066355228424, "logits/chosen": 0.9197671413421631, "logits/rejected": 0.9018824100494385, "logps/chosen": -2.2296125888824463, "logps/rejected": -3.77584171295166, "loss": 0.5678, "nll_loss": 0.5447419881820679, "rewards/accuracies": 1.0, "rewards/chosen": -0.22296124696731567, "rewards/margins": 0.1546229124069214, "rewards/rejected": -0.37758415937423706, "step": 3515 }, { "epoch": 9.626283367556468, "grad_norm": 3.170762538909912, "learning_rate": 5.183561643835617e-07, "log_odds_chosen": 4.05308723449707, "log_odds_ratio": -0.04574527218937874, "logits/chosen": 1.0914299488067627, "logits/rejected": 1.1144278049468994, "logps/chosen": -2.2429215908050537, "logps/rejected": -6.127415180206299, "loss": 0.6818, "nll_loss": 0.6772024631500244, "rewards/accuracies": 1.0, "rewards/chosen": -0.22429215908050537, "rewards/margins": 0.38844937086105347, "rewards/rejected": -0.6127415299415588, "step": 3516 }, { "epoch": 9.6290212183436, "grad_norm": 3.5584685802459717, "learning_rate": 5.182191780821917e-07, "log_odds_chosen": 1.8391602039337158, "log_odds_ratio": -0.42152437567710876, "logits/chosen": 1.1582590341567993, "logits/rejected": 1.1878975629806519, "logps/chosen": -2.0663740634918213, "logps/rejected": -3.833749294281006, "loss": 0.5959, "nll_loss": 0.5536981821060181, "rewards/accuracies": 0.75, "rewards/chosen": -0.2066374123096466, "rewards/margins": 0.17673750221729279, "rewards/rejected": -0.3833749294281006, "step": 3517 }, { "epoch": 9.631759069130732, "grad_norm": 4.156022071838379, "learning_rate": 5.180821917808219e-07, "log_odds_chosen": 1.4167118072509766, "log_odds_ratio": -0.3853365182876587, "logits/chosen": 0.7773033976554871, "logits/rejected": 0.7526288032531738, "logps/chosen": -2.096923351287842, "logps/rejected": -3.4423155784606934, "loss": 0.6526, "nll_loss": 0.6140402555465698, "rewards/accuracies": 0.75, "rewards/chosen": -0.2096923291683197, "rewards/margins": 0.13453920185565948, "rewards/rejected": -0.344231516122818, "step": 3518 }, { "epoch": 9.634496919917865, "grad_norm": 3.642827272415161, "learning_rate": 5.179452054794521e-07, "log_odds_chosen": 1.4549448490142822, "log_odds_ratio": -0.31741878390312195, "logits/chosen": 0.8205786347389221, "logits/rejected": 0.8718726634979248, "logps/chosen": -1.8458058834075928, "logps/rejected": -3.177184581756592, "loss": 0.6718, "nll_loss": 0.6400318145751953, "rewards/accuracies": 0.875, "rewards/chosen": -0.18458059430122375, "rewards/margins": 0.13313785195350647, "rewards/rejected": -0.3177184462547302, "step": 3519 }, { "epoch": 9.637234770704996, "grad_norm": 5.124238967895508, "learning_rate": 5.178082191780821e-07, "log_odds_chosen": 1.1123950481414795, "log_odds_ratio": -0.45951199531555176, "logits/chosen": 1.0062828063964844, "logits/rejected": 1.00105881690979, "logps/chosen": -2.2569167613983154, "logps/rejected": -3.258805274963379, "loss": 0.5862, "nll_loss": 0.5402709245681763, "rewards/accuracies": 0.75, "rewards/chosen": -0.22569167613983154, "rewards/margins": 0.10018886625766754, "rewards/rejected": -0.3258805274963379, "step": 3520 }, { "epoch": 9.639972621492129, "grad_norm": 3.7721052169799805, "learning_rate": 5.176712328767123e-07, "log_odds_chosen": 4.160490036010742, "log_odds_ratio": -0.3080134689807892, "logits/chosen": 0.8611779808998108, "logits/rejected": 0.8011783361434937, "logps/chosen": -1.7680593729019165, "logps/rejected": -5.783768177032471, "loss": 0.6915, "nll_loss": 0.6607217192649841, "rewards/accuracies": 0.875, "rewards/chosen": -0.17680594325065613, "rewards/margins": 0.4015708863735199, "rewards/rejected": -0.578376829624176, "step": 3521 }, { "epoch": 9.64271047227926, "grad_norm": 5.9342451095581055, "learning_rate": 5.175342465753424e-07, "log_odds_chosen": 2.6063225269317627, "log_odds_ratio": -0.39624547958374023, "logits/chosen": 0.9072157144546509, "logits/rejected": 0.8978782892227173, "logps/chosen": -2.257866382598877, "logps/rejected": -4.708380699157715, "loss": 0.5711, "nll_loss": 0.531470537185669, "rewards/accuracies": 0.875, "rewards/chosen": -0.22578665614128113, "rewards/margins": 0.24505141377449036, "rewards/rejected": -0.4708380699157715, "step": 3522 }, { "epoch": 9.645448323066393, "grad_norm": 4.4856085777282715, "learning_rate": 5.173972602739725e-07, "log_odds_chosen": 1.8479015827178955, "log_odds_ratio": -0.31171542406082153, "logits/chosen": 1.0338306427001953, "logits/rejected": 1.0745760202407837, "logps/chosen": -1.8961519002914429, "logps/rejected": -3.5541529655456543, "loss": 0.5584, "nll_loss": 0.527265727519989, "rewards/accuracies": 0.875, "rewards/chosen": -0.1896151602268219, "rewards/margins": 0.1658000946044922, "rewards/rejected": -0.3554152548313141, "step": 3523 }, { "epoch": 9.648186173853524, "grad_norm": 3.7490756511688232, "learning_rate": 5.172602739726027e-07, "log_odds_chosen": 1.879913330078125, "log_odds_ratio": -0.18790577352046967, "logits/chosen": 0.7797943353652954, "logits/rejected": 0.7348464727401733, "logps/chosen": -1.5745930671691895, "logps/rejected": -3.2332634925842285, "loss": 0.701, "nll_loss": 0.6822494268417358, "rewards/accuracies": 1.0, "rewards/chosen": -0.1574593037366867, "rewards/margins": 0.16586707532405853, "rewards/rejected": -0.32332637906074524, "step": 3524 }, { "epoch": 9.650924024640657, "grad_norm": 6.551977634429932, "learning_rate": 5.171232876712328e-07, "log_odds_chosen": 0.8667446374893188, "log_odds_ratio": -0.7331186532974243, "logits/chosen": 1.0869451761245728, "logits/rejected": 1.0864388942718506, "logps/chosen": -3.1060984134674072, "logps/rejected": -3.9476630687713623, "loss": 0.6614, "nll_loss": 0.5880805253982544, "rewards/accuracies": 0.75, "rewards/chosen": -0.3106098175048828, "rewards/margins": 0.08415648341178894, "rewards/rejected": -0.39476630091667175, "step": 3525 }, { "epoch": 9.653661875427789, "grad_norm": 4.093384742736816, "learning_rate": 5.16986301369863e-07, "log_odds_chosen": 1.3948525190353394, "log_odds_ratio": -0.3668922781944275, "logits/chosen": 1.1019333600997925, "logits/rejected": 1.145838975906372, "logps/chosen": -2.512986660003662, "logps/rejected": -3.7900264263153076, "loss": 0.5764, "nll_loss": 0.5397544503211975, "rewards/accuracies": 0.875, "rewards/chosen": -0.2512986660003662, "rewards/margins": 0.1277039647102356, "rewards/rejected": -0.3790026307106018, "step": 3526 }, { "epoch": 9.656399726214921, "grad_norm": 3.2793452739715576, "learning_rate": 5.168493150684931e-07, "log_odds_chosen": 2.7473526000976562, "log_odds_ratio": -0.10931585729122162, "logits/chosen": 1.0647026300430298, "logits/rejected": 1.063039779663086, "logps/chosen": -1.462043046951294, "logps/rejected": -3.9436838626861572, "loss": 0.5733, "nll_loss": 0.5623908042907715, "rewards/accuracies": 1.0, "rewards/chosen": -0.14620430767536163, "rewards/margins": 0.2481640875339508, "rewards/rejected": -0.39436841011047363, "step": 3527 }, { "epoch": 9.659137577002053, "grad_norm": 5.417993545532227, "learning_rate": 5.167123287671232e-07, "log_odds_chosen": 1.5693957805633545, "log_odds_ratio": -0.4377249479293823, "logits/chosen": 0.8704279661178589, "logits/rejected": 0.8521531820297241, "logps/chosen": -2.0071732997894287, "logps/rejected": -3.3581690788269043, "loss": 0.6437, "nll_loss": 0.5998976230621338, "rewards/accuracies": 0.75, "rewards/chosen": -0.20071735978126526, "rewards/margins": 0.13509956002235413, "rewards/rejected": -0.3358169198036194, "step": 3528 }, { "epoch": 9.661875427789186, "grad_norm": 3.1649162769317627, "learning_rate": 5.165753424657534e-07, "log_odds_chosen": 6.315584182739258, "log_odds_ratio": -0.0944937989115715, "logits/chosen": 1.0687483549118042, "logits/rejected": 1.1174335479736328, "logps/chosen": -1.6303441524505615, "logps/rejected": -7.666268348693848, "loss": 0.679, "nll_loss": 0.6695326566696167, "rewards/accuracies": 1.0, "rewards/chosen": -0.16303442418575287, "rewards/margins": 0.6035923957824707, "rewards/rejected": -0.7666268348693848, "step": 3529 }, { "epoch": 9.664613278576317, "grad_norm": 5.126276969909668, "learning_rate": 5.164383561643836e-07, "log_odds_chosen": 0.7441249489784241, "log_odds_ratio": -0.6889017224311829, "logits/chosen": 0.8987610340118408, "logits/rejected": 0.9075300693511963, "logps/chosen": -2.239900588989258, "logps/rejected": -2.8943614959716797, "loss": 0.6456, "nll_loss": 0.5766650438308716, "rewards/accuracies": 0.875, "rewards/chosen": -0.2239900827407837, "rewards/margins": 0.06544607877731323, "rewards/rejected": -0.28943613171577454, "step": 3530 }, { "epoch": 9.66735112936345, "grad_norm": 3.985840320587158, "learning_rate": 5.163013698630136e-07, "log_odds_chosen": 2.2667741775512695, "log_odds_ratio": -0.33832690119743347, "logits/chosen": 0.9317877292633057, "logits/rejected": 0.9208637475967407, "logps/chosen": -1.936964511871338, "logps/rejected": -4.038137912750244, "loss": 0.6227, "nll_loss": 0.5888334512710571, "rewards/accuracies": 0.875, "rewards/chosen": -0.19369643926620483, "rewards/margins": 0.210117369890213, "rewards/rejected": -0.40381383895874023, "step": 3531 }, { "epoch": 9.670088980150581, "grad_norm": 7.02525520324707, "learning_rate": 5.161643835616438e-07, "log_odds_chosen": 2.8170547485351562, "log_odds_ratio": -0.29238376021385193, "logits/chosen": 1.0793958902359009, "logits/rejected": 1.0562585592269897, "logps/chosen": -2.328498125076294, "logps/rejected": -4.950400352478027, "loss": 0.736, "nll_loss": 0.706757664680481, "rewards/accuracies": 0.875, "rewards/chosen": -0.2328498363494873, "rewards/margins": 0.2621902525424957, "rewards/rejected": -0.49504005908966064, "step": 3532 }, { "epoch": 9.672826830937714, "grad_norm": 3.763256311416626, "learning_rate": 5.16027397260274e-07, "log_odds_chosen": 1.0521763563156128, "log_odds_ratio": -0.32663828134536743, "logits/chosen": 1.023579716682434, "logits/rejected": 1.0088814496994019, "logps/chosen": -1.988569736480713, "logps/rejected": -2.9315123558044434, "loss": 0.5729, "nll_loss": 0.5402745008468628, "rewards/accuracies": 1.0, "rewards/chosen": -0.19885697960853577, "rewards/margins": 0.0942942351102829, "rewards/rejected": -0.29315119981765747, "step": 3533 }, { "epoch": 9.675564681724847, "grad_norm": 2.9209086894989014, "learning_rate": 5.15890410958904e-07, "log_odds_chosen": 3.446288585662842, "log_odds_ratio": -0.1422368288040161, "logits/chosen": 1.0227890014648438, "logits/rejected": 1.0387020111083984, "logps/chosen": -2.13010311126709, "logps/rejected": -5.412224292755127, "loss": 0.6703, "nll_loss": 0.6560778021812439, "rewards/accuracies": 1.0, "rewards/chosen": -0.21301032602787018, "rewards/margins": 0.32821208238601685, "rewards/rejected": -0.5412224531173706, "step": 3534 }, { "epoch": 9.678302532511978, "grad_norm": 3.074531316757202, "learning_rate": 5.157534246575342e-07, "log_odds_chosen": 3.519681215286255, "log_odds_ratio": -0.1945149153470993, "logits/chosen": 0.9713357090950012, "logits/rejected": 1.0211132764816284, "logps/chosen": -1.563537836074829, "logps/rejected": -4.863492012023926, "loss": 0.5764, "nll_loss": 0.5569618940353394, "rewards/accuracies": 0.875, "rewards/chosen": -0.15635380148887634, "rewards/margins": 0.32999542355537415, "rewards/rejected": -0.4863492250442505, "step": 3535 }, { "epoch": 9.681040383299111, "grad_norm": 7.518016338348389, "learning_rate": 5.156164383561643e-07, "log_odds_chosen": 3.357729911804199, "log_odds_ratio": -0.2974203824996948, "logits/chosen": 1.243788480758667, "logits/rejected": 1.2180589437484741, "logps/chosen": -3.056032657623291, "logps/rejected": -6.288843154907227, "loss": 0.6845, "nll_loss": 0.6547106504440308, "rewards/accuracies": 0.875, "rewards/chosen": -0.3056032657623291, "rewards/margins": 0.32328107953071594, "rewards/rejected": -0.6288843750953674, "step": 3536 }, { "epoch": 9.683778234086242, "grad_norm": 3.667269706726074, "learning_rate": 5.154794520547944e-07, "log_odds_chosen": 1.5427809953689575, "log_odds_ratio": -0.28698790073394775, "logits/chosen": 0.8656724691390991, "logits/rejected": 0.8681687116622925, "logps/chosen": -1.9119515419006348, "logps/rejected": -3.345304012298584, "loss": 0.5908, "nll_loss": 0.5620854496955872, "rewards/accuracies": 1.0, "rewards/chosen": -0.19119516015052795, "rewards/margins": 0.1433352380990982, "rewards/rejected": -0.33453041315078735, "step": 3537 }, { "epoch": 9.686516084873375, "grad_norm": 5.585895538330078, "learning_rate": 5.153424657534246e-07, "log_odds_chosen": 1.5875577926635742, "log_odds_ratio": -0.4302780330181122, "logits/chosen": 1.0365439653396606, "logits/rejected": 0.9631975293159485, "logps/chosen": -1.975760579109192, "logps/rejected": -3.3643927574157715, "loss": 0.6349, "nll_loss": 0.5918466448783875, "rewards/accuracies": 0.75, "rewards/chosen": -0.19757606089115143, "rewards/margins": 0.138863205909729, "rewards/rejected": -0.3364392817020416, "step": 3538 }, { "epoch": 9.689253935660506, "grad_norm": 3.568753242492676, "learning_rate": 5.152054794520547e-07, "log_odds_chosen": 1.5908945798873901, "log_odds_ratio": -0.24086405336856842, "logits/chosen": 1.1307326555252075, "logits/rejected": 1.149993658065796, "logps/chosen": -2.4879627227783203, "logps/rejected": -3.9915051460266113, "loss": 0.5948, "nll_loss": 0.5707587003707886, "rewards/accuracies": 1.0, "rewards/chosen": -0.248796284198761, "rewards/margins": 0.15035419166088104, "rewards/rejected": -0.3991504907608032, "step": 3539 }, { "epoch": 9.69199178644764, "grad_norm": 4.254080295562744, "learning_rate": 5.150684931506849e-07, "log_odds_chosen": 1.6983907222747803, "log_odds_ratio": -0.3571401536464691, "logits/chosen": 1.03155517578125, "logits/rejected": 1.0805410146713257, "logps/chosen": -2.2222495079040527, "logps/rejected": -3.8307783603668213, "loss": 0.6058, "nll_loss": 0.5701268315315247, "rewards/accuracies": 0.875, "rewards/chosen": -0.22222495079040527, "rewards/margins": 0.16085287928581238, "rewards/rejected": -0.38307785987854004, "step": 3540 }, { "epoch": 9.69472963723477, "grad_norm": 3.018087148666382, "learning_rate": 5.14931506849315e-07, "log_odds_chosen": 2.9942548274993896, "log_odds_ratio": -0.25276753306388855, "logits/chosen": 0.8366031646728516, "logits/rejected": 0.8003853559494019, "logps/chosen": -2.123641014099121, "logps/rejected": -4.973207950592041, "loss": 0.5688, "nll_loss": 0.543567955493927, "rewards/accuracies": 1.0, "rewards/chosen": -0.2123641073703766, "rewards/margins": 0.284956693649292, "rewards/rejected": -0.4973208010196686, "step": 3541 }, { "epoch": 9.697467488021903, "grad_norm": 4.008447170257568, "learning_rate": 5.147945205479451e-07, "log_odds_chosen": 2.57747220993042, "log_odds_ratio": -0.2613956928253174, "logits/chosen": 1.0957616567611694, "logits/rejected": 1.1384286880493164, "logps/chosen": -2.8008227348327637, "logps/rejected": -5.303412914276123, "loss": 0.6738, "nll_loss": 0.6477007865905762, "rewards/accuracies": 1.0, "rewards/chosen": -0.2800822854042053, "rewards/margins": 0.25025901198387146, "rewards/rejected": -0.5303412675857544, "step": 3542 }, { "epoch": 9.700205338809035, "grad_norm": 5.214943885803223, "learning_rate": 5.146575342465753e-07, "log_odds_chosen": 1.8496050834655762, "log_odds_ratio": -0.24854210019111633, "logits/chosen": 0.8314078450202942, "logits/rejected": 0.8358397483825684, "logps/chosen": -3.032008647918701, "logps/rejected": -4.7650251388549805, "loss": 0.5938, "nll_loss": 0.5689147114753723, "rewards/accuracies": 1.0, "rewards/chosen": -0.3032008707523346, "rewards/margins": 0.17330163717269897, "rewards/rejected": -0.47650250792503357, "step": 3543 }, { "epoch": 9.702943189596168, "grad_norm": 3.42645263671875, "learning_rate": 5.145205479452054e-07, "log_odds_chosen": 2.8050765991210938, "log_odds_ratio": -0.21352452039718628, "logits/chosen": 0.8279484510421753, "logits/rejected": 0.8231393098831177, "logps/chosen": -2.291599750518799, "logps/rejected": -4.992873191833496, "loss": 0.7339, "nll_loss": 0.7125224471092224, "rewards/accuracies": 0.875, "rewards/chosen": -0.22915998101234436, "rewards/margins": 0.2701273560523987, "rewards/rejected": -0.49928736686706543, "step": 3544 }, { "epoch": 9.705681040383299, "grad_norm": 3.3288729190826416, "learning_rate": 5.143835616438355e-07, "log_odds_chosen": 2.6174192428588867, "log_odds_ratio": -0.29988154768943787, "logits/chosen": 1.0547776222229004, "logits/rejected": 1.0168232917785645, "logps/chosen": -1.7678511142730713, "logps/rejected": -4.211117267608643, "loss": 0.5589, "nll_loss": 0.5288936495780945, "rewards/accuracies": 0.875, "rewards/chosen": -0.17678511142730713, "rewards/margins": 0.244326651096344, "rewards/rejected": -0.4211117625236511, "step": 3545 }, { "epoch": 9.708418891170432, "grad_norm": 3.7652533054351807, "learning_rate": 5.142465753424657e-07, "log_odds_chosen": 2.4031569957733154, "log_odds_ratio": -0.16976302862167358, "logits/chosen": 0.9385507106781006, "logits/rejected": 0.8729783296585083, "logps/chosen": -1.7782093286514282, "logps/rejected": -3.9654369354248047, "loss": 0.6132, "nll_loss": 0.5961951017379761, "rewards/accuracies": 1.0, "rewards/chosen": -0.17782093584537506, "rewards/margins": 0.21872276067733765, "rewards/rejected": -0.3965437114238739, "step": 3546 }, { "epoch": 9.711156741957563, "grad_norm": 3.362189769744873, "learning_rate": 5.14109589041096e-07, "log_odds_chosen": 1.654628872871399, "log_odds_ratio": -0.22419989109039307, "logits/chosen": 0.9425839781761169, "logits/rejected": 0.970683217048645, "logps/chosen": -2.0842764377593994, "logps/rejected": -3.608825206756592, "loss": 0.6484, "nll_loss": 0.62600177526474, "rewards/accuracies": 1.0, "rewards/chosen": -0.20842766761779785, "rewards/margins": 0.15245485305786133, "rewards/rejected": -0.3608825206756592, "step": 3547 }, { "epoch": 9.713894592744696, "grad_norm": 8.219618797302246, "learning_rate": 5.13972602739726e-07, "log_odds_chosen": 2.089616537094116, "log_odds_ratio": -0.5806143283843994, "logits/chosen": 1.030867099761963, "logits/rejected": 0.9481858611106873, "logps/chosen": -2.138645887374878, "logps/rejected": -4.0900444984436035, "loss": 0.6259, "nll_loss": 0.5678030848503113, "rewards/accuracies": 0.75, "rewards/chosen": -0.21386459469795227, "rewards/margins": 0.19513987004756927, "rewards/rejected": -0.40900447964668274, "step": 3548 }, { "epoch": 9.716632443531827, "grad_norm": 3.7043070793151855, "learning_rate": 5.138356164383562e-07, "log_odds_chosen": 3.979808807373047, "log_odds_ratio": -0.13475792109966278, "logits/chosen": 1.0450193881988525, "logits/rejected": 1.081740379333496, "logps/chosen": -2.3100173473358154, "logps/rejected": -6.163511753082275, "loss": 0.7602, "nll_loss": 0.7466814517974854, "rewards/accuracies": 1.0, "rewards/chosen": -0.23100173473358154, "rewards/margins": 0.38534945249557495, "rewards/rejected": -0.6163512468338013, "step": 3549 }, { "epoch": 9.71937029431896, "grad_norm": 3.0329720973968506, "learning_rate": 5.136986301369864e-07, "log_odds_chosen": 3.7173011302948, "log_odds_ratio": -0.20382383465766907, "logits/chosen": 0.7412083745002747, "logits/rejected": 0.676703691482544, "logps/chosen": -1.8742581605911255, "logps/rejected": -5.400112152099609, "loss": 0.6794, "nll_loss": 0.6589688658714294, "rewards/accuracies": 1.0, "rewards/chosen": -0.18742582201957703, "rewards/margins": 0.3525853753089905, "rewards/rejected": -0.5400112271308899, "step": 3550 }, { "epoch": 9.722108145106091, "grad_norm": 3.690800666809082, "learning_rate": 5.135616438356164e-07, "log_odds_chosen": 2.1676297187805176, "log_odds_ratio": -0.4354495406150818, "logits/chosen": 0.7220432162284851, "logits/rejected": 0.7618340253829956, "logps/chosen": -2.325881004333496, "logps/rejected": -4.354437828063965, "loss": 0.7256, "nll_loss": 0.6820326447486877, "rewards/accuracies": 0.75, "rewards/chosen": -0.23258809745311737, "rewards/margins": 0.2028556764125824, "rewards/rejected": -0.4354437589645386, "step": 3551 }, { "epoch": 9.724845995893224, "grad_norm": 3.2345004081726074, "learning_rate": 5.134246575342466e-07, "log_odds_chosen": 2.571523904800415, "log_odds_ratio": -0.35339826345443726, "logits/chosen": 1.1304497718811035, "logits/rejected": 1.096346378326416, "logps/chosen": -1.8598700761795044, "logps/rejected": -4.282177448272705, "loss": 0.6474, "nll_loss": 0.6121089458465576, "rewards/accuracies": 0.75, "rewards/chosen": -0.18598699569702148, "rewards/margins": 0.24223074316978455, "rewards/rejected": -0.4282177686691284, "step": 3552 }, { "epoch": 9.727583846680355, "grad_norm": 3.4840176105499268, "learning_rate": 5.132876712328767e-07, "log_odds_chosen": 2.8089675903320312, "log_odds_ratio": -0.2581402063369751, "logits/chosen": 0.6913543939590454, "logits/rejected": 0.6081061363220215, "logps/chosen": -1.4600830078125, "logps/rejected": -4.061347007751465, "loss": 0.6161, "nll_loss": 0.5902644395828247, "rewards/accuracies": 1.0, "rewards/chosen": -0.14600829780101776, "rewards/margins": 0.26012641191482544, "rewards/rejected": -0.4061347246170044, "step": 3553 }, { "epoch": 9.730321697467488, "grad_norm": 3.4598605632781982, "learning_rate": 5.131506849315069e-07, "log_odds_chosen": 2.706207752227783, "log_odds_ratio": -0.24199432134628296, "logits/chosen": 1.0734938383102417, "logits/rejected": 0.9811606407165527, "logps/chosen": -1.530083179473877, "logps/rejected": -3.9393763542175293, "loss": 0.5668, "nll_loss": 0.5426371097564697, "rewards/accuracies": 1.0, "rewards/chosen": -0.1530083268880844, "rewards/margins": 0.24092929065227509, "rewards/rejected": -0.3939376175403595, "step": 3554 }, { "epoch": 9.73305954825462, "grad_norm": 3.213895559310913, "learning_rate": 5.13013698630137e-07, "log_odds_chosen": 2.4036900997161865, "log_odds_ratio": -0.1592225581407547, "logits/chosen": 1.0592210292816162, "logits/rejected": 1.0473097562789917, "logps/chosen": -1.8597543239593506, "logps/rejected": -4.103287220001221, "loss": 0.529, "nll_loss": 0.5130531191825867, "rewards/accuracies": 1.0, "rewards/chosen": -0.18597543239593506, "rewards/margins": 0.22435328364372253, "rewards/rejected": -0.4103286862373352, "step": 3555 }, { "epoch": 9.735797399041752, "grad_norm": 3.38716721534729, "learning_rate": 5.128767123287671e-07, "log_odds_chosen": 4.3148651123046875, "log_odds_ratio": -0.22618167102336884, "logits/chosen": 0.9260232448577881, "logits/rejected": 0.9579541683197021, "logps/chosen": -1.8696850538253784, "logps/rejected": -5.9779276847839355, "loss": 0.6017, "nll_loss": 0.5791001319885254, "rewards/accuracies": 0.875, "rewards/chosen": -0.18696850538253784, "rewards/margins": 0.4108242988586426, "rewards/rejected": -0.5977928042411804, "step": 3556 }, { "epoch": 9.738535249828884, "grad_norm": 3.2530925273895264, "learning_rate": 5.127397260273973e-07, "log_odds_chosen": 2.246952772140503, "log_odds_ratio": -0.21310989558696747, "logits/chosen": 0.85532546043396, "logits/rejected": 0.8697963356971741, "logps/chosen": -1.48750638961792, "logps/rejected": -3.527822971343994, "loss": 0.544, "nll_loss": 0.5226771831512451, "rewards/accuracies": 1.0, "rewards/chosen": -0.14875063300132751, "rewards/margins": 0.20403164625167847, "rewards/rejected": -0.35278230905532837, "step": 3557 }, { "epoch": 9.741273100616016, "grad_norm": 3.2082412242889404, "learning_rate": 5.126027397260274e-07, "log_odds_chosen": 2.555851936340332, "log_odds_ratio": -0.1973654329776764, "logits/chosen": 1.0360770225524902, "logits/rejected": 0.9507956504821777, "logps/chosen": -1.9421939849853516, "logps/rejected": -4.255777359008789, "loss": 0.6822, "nll_loss": 0.6624897718429565, "rewards/accuracies": 1.0, "rewards/chosen": -0.1942194104194641, "rewards/margins": 0.2313583642244339, "rewards/rejected": -0.4255777895450592, "step": 3558 }, { "epoch": 9.744010951403148, "grad_norm": 4.104471206665039, "learning_rate": 5.124657534246575e-07, "log_odds_chosen": 1.9066534042358398, "log_odds_ratio": -0.2889416217803955, "logits/chosen": 0.61380934715271, "logits/rejected": 0.6117408275604248, "logps/chosen": -2.3963871002197266, "logps/rejected": -4.16928768157959, "loss": 0.65, "nll_loss": 0.6211203336715698, "rewards/accuracies": 1.0, "rewards/chosen": -0.23963874578475952, "rewards/margins": 0.17729006707668304, "rewards/rejected": -0.41692879796028137, "step": 3559 }, { "epoch": 9.74674880219028, "grad_norm": 6.846187591552734, "learning_rate": 5.123287671232877e-07, "log_odds_chosen": 3.3964896202087402, "log_odds_ratio": -0.37345725297927856, "logits/chosen": 1.2258061170578003, "logits/rejected": 1.2670284509658813, "logps/chosen": -3.3947410583496094, "logps/rejected": -6.767622947692871, "loss": 0.6578, "nll_loss": 0.6204646229743958, "rewards/accuracies": 0.875, "rewards/chosen": -0.3394741117954254, "rewards/margins": 0.3372882008552551, "rewards/rejected": -0.6767623424530029, "step": 3560 }, { "epoch": 9.749486652977414, "grad_norm": 3.900763988494873, "learning_rate": 5.121917808219179e-07, "log_odds_chosen": 1.5806617736816406, "log_odds_ratio": -0.2826438546180725, "logits/chosen": 1.0368911027908325, "logits/rejected": 0.9270832538604736, "logps/chosen": -1.160469651222229, "logps/rejected": -2.4972457885742188, "loss": 0.522, "nll_loss": 0.493763267993927, "rewards/accuracies": 1.0, "rewards/chosen": -0.1160469651222229, "rewards/margins": 0.1336776167154312, "rewards/rejected": -0.2497245818376541, "step": 3561 }, { "epoch": 9.752224503764545, "grad_norm": 4.865588188171387, "learning_rate": 5.120547945205479e-07, "log_odds_chosen": 1.4412519931793213, "log_odds_ratio": -0.5474170446395874, "logits/chosen": 0.7964732646942139, "logits/rejected": 0.7303904294967651, "logps/chosen": -1.939164400100708, "logps/rejected": -3.105661153793335, "loss": 0.6266, "nll_loss": 0.5718607306480408, "rewards/accuracies": 0.75, "rewards/chosen": -0.193916454911232, "rewards/margins": 0.11664967238903046, "rewards/rejected": -0.31056612730026245, "step": 3562 }, { "epoch": 9.754962354551678, "grad_norm": 4.300881862640381, "learning_rate": 5.119178082191781e-07, "log_odds_chosen": 1.0771121978759766, "log_odds_ratio": -0.4209784269332886, "logits/chosen": 1.0507311820983887, "logits/rejected": 1.024202823638916, "logps/chosen": -2.1101527214050293, "logps/rejected": -3.0557124614715576, "loss": 0.5406, "nll_loss": 0.49849116802215576, "rewards/accuracies": 0.875, "rewards/chosen": -0.21101529896259308, "rewards/margins": 0.09455597400665283, "rewards/rejected": -0.3055712580680847, "step": 3563 }, { "epoch": 9.757700205338809, "grad_norm": 3.030071973800659, "learning_rate": 5.117808219178083e-07, "log_odds_chosen": 3.247140407562256, "log_odds_ratio": -0.1310473382472992, "logits/chosen": 0.9252711534500122, "logits/rejected": 0.9575248956680298, "logps/chosen": -1.7486214637756348, "logps/rejected": -4.799018383026123, "loss": 0.5791, "nll_loss": 0.5660384297370911, "rewards/accuracies": 1.0, "rewards/chosen": -0.17486216127872467, "rewards/margins": 0.3050397038459778, "rewards/rejected": -0.47990188002586365, "step": 3564 }, { "epoch": 9.760438056125942, "grad_norm": 4.659661769866943, "learning_rate": 5.116438356164383e-07, "log_odds_chosen": 1.1926822662353516, "log_odds_ratio": -0.5522919297218323, "logits/chosen": 0.9204076528549194, "logits/rejected": 0.9111549258232117, "logps/chosen": -1.7376222610473633, "logps/rejected": -2.8354368209838867, "loss": 0.6465, "nll_loss": 0.5912973880767822, "rewards/accuracies": 0.625, "rewards/chosen": -0.1737622320652008, "rewards/margins": 0.10978145152330399, "rewards/rejected": -0.2835436761379242, "step": 3565 }, { "epoch": 9.763175906913073, "grad_norm": 3.8030059337615967, "learning_rate": 5.115068493150685e-07, "log_odds_chosen": 3.01861834526062, "log_odds_ratio": -0.16651538014411926, "logits/chosen": 0.8820457458496094, "logits/rejected": 0.8458729982376099, "logps/chosen": -2.4929308891296387, "logps/rejected": -5.387247562408447, "loss": 0.5699, "nll_loss": 0.5532809495925903, "rewards/accuracies": 1.0, "rewards/chosen": -0.24929308891296387, "rewards/margins": 0.289431631565094, "rewards/rejected": -0.5387247204780579, "step": 3566 }, { "epoch": 9.765913757700206, "grad_norm": 3.489204168319702, "learning_rate": 5.113698630136986e-07, "log_odds_chosen": 3.1198225021362305, "log_odds_ratio": -0.19525489211082458, "logits/chosen": 0.7802219390869141, "logits/rejected": 0.7530068755149841, "logps/chosen": -2.418132781982422, "logps/rejected": -5.423700332641602, "loss": 0.7454, "nll_loss": 0.7258870005607605, "rewards/accuracies": 1.0, "rewards/chosen": -0.2418133020401001, "rewards/margins": 0.3005567193031311, "rewards/rejected": -0.5423700213432312, "step": 3567 }, { "epoch": 9.768651608487337, "grad_norm": 3.1514768600463867, "learning_rate": 5.112328767123288e-07, "log_odds_chosen": 2.2300939559936523, "log_odds_ratio": -0.2103789895772934, "logits/chosen": 1.0079861879348755, "logits/rejected": 0.8809213638305664, "logps/chosen": -1.90065598487854, "logps/rejected": -3.9355180263519287, "loss": 0.6697, "nll_loss": 0.6486150622367859, "rewards/accuracies": 1.0, "rewards/chosen": -0.19006559252738953, "rewards/margins": 0.20348623394966125, "rewards/rejected": -0.3935518264770508, "step": 3568 }, { "epoch": 9.77138945927447, "grad_norm": 3.5427021980285645, "learning_rate": 5.110958904109589e-07, "log_odds_chosen": 3.2690422534942627, "log_odds_ratio": -0.10086803138256073, "logits/chosen": 0.9170087575912476, "logits/rejected": 0.9302108883857727, "logps/chosen": -2.5808167457580566, "logps/rejected": -5.642482280731201, "loss": 0.6689, "nll_loss": 0.6588561534881592, "rewards/accuracies": 1.0, "rewards/chosen": -0.25808167457580566, "rewards/margins": 0.3061665892601013, "rewards/rejected": -0.5642482042312622, "step": 3569 }, { "epoch": 9.774127310061601, "grad_norm": 3.171231746673584, "learning_rate": 5.10958904109589e-07, "log_odds_chosen": 3.0102028846740723, "log_odds_ratio": -0.16828978061676025, "logits/chosen": 0.8860718011856079, "logits/rejected": 0.9184005856513977, "logps/chosen": -1.2897247076034546, "logps/rejected": -3.95121169090271, "loss": 0.5484, "nll_loss": 0.5316095948219299, "rewards/accuracies": 1.0, "rewards/chosen": -0.12897247076034546, "rewards/margins": 0.2661486864089966, "rewards/rejected": -0.39512115716934204, "step": 3570 }, { "epoch": 9.776865160848734, "grad_norm": 3.7280685901641846, "learning_rate": 5.108219178082192e-07, "log_odds_chosen": 2.1714820861816406, "log_odds_ratio": -0.2135070413351059, "logits/chosen": 0.8219479322433472, "logits/rejected": 0.8464503884315491, "logps/chosen": -1.8621923923492432, "logps/rejected": -3.8779640197753906, "loss": 0.6986, "nll_loss": 0.6772103905677795, "rewards/accuracies": 1.0, "rewards/chosen": -0.1862192451953888, "rewards/margins": 0.20157718658447266, "rewards/rejected": -0.38779640197753906, "step": 3571 }, { "epoch": 9.779603011635865, "grad_norm": 3.993306875228882, "learning_rate": 5.106849315068493e-07, "log_odds_chosen": 0.5830603837966919, "log_odds_ratio": -0.5798067450523376, "logits/chosen": 0.8588744401931763, "logits/rejected": 0.8265054821968079, "logps/chosen": -1.8040556907653809, "logps/rejected": -2.307048797607422, "loss": 0.6228, "nll_loss": 0.5648106336593628, "rewards/accuracies": 0.75, "rewards/chosen": -0.18040555715560913, "rewards/margins": 0.050299324095249176, "rewards/rejected": -0.2307048887014389, "step": 3572 }, { "epoch": 9.782340862422998, "grad_norm": 6.020162582397461, "learning_rate": 5.105479452054794e-07, "log_odds_chosen": 1.068558692932129, "log_odds_ratio": -0.5859119892120361, "logits/chosen": 0.8238902688026428, "logits/rejected": 0.8638138175010681, "logps/chosen": -2.8463499546051025, "logps/rejected": -3.828860282897949, "loss": 0.7319, "nll_loss": 0.673296332359314, "rewards/accuracies": 0.625, "rewards/chosen": -0.2846350073814392, "rewards/margins": 0.09825103729963303, "rewards/rejected": -0.38288605213165283, "step": 3573 }, { "epoch": 9.78507871321013, "grad_norm": 3.313755989074707, "learning_rate": 5.104109589041096e-07, "log_odds_chosen": 1.2435696125030518, "log_odds_ratio": -0.3615046739578247, "logits/chosen": 1.1060879230499268, "logits/rejected": 1.0876353979110718, "logps/chosen": -1.4965617656707764, "logps/rejected": -2.6180903911590576, "loss": 0.6003, "nll_loss": 0.5641111135482788, "rewards/accuracies": 0.875, "rewards/chosen": -0.14965617656707764, "rewards/margins": 0.11215286701917648, "rewards/rejected": -0.2618090510368347, "step": 3574 }, { "epoch": 9.787816563997263, "grad_norm": 3.1544747352600098, "learning_rate": 5.102739726027398e-07, "log_odds_chosen": 1.2860875129699707, "log_odds_ratio": -0.40393975377082825, "logits/chosen": 0.7216450572013855, "logits/rejected": 0.6555283069610596, "logps/chosen": -1.7318799495697021, "logps/rejected": -2.8437063694000244, "loss": 0.6203, "nll_loss": 0.5798734426498413, "rewards/accuracies": 0.75, "rewards/chosen": -0.1731880009174347, "rewards/margins": 0.11118263006210327, "rewards/rejected": -0.28437063097953796, "step": 3575 }, { "epoch": 9.790554414784394, "grad_norm": 2.921809673309326, "learning_rate": 5.101369863013698e-07, "log_odds_chosen": 1.983199954032898, "log_odds_ratio": -0.27509355545043945, "logits/chosen": 1.0749106407165527, "logits/rejected": 1.0533664226531982, "logps/chosen": -1.8222912549972534, "logps/rejected": -3.683511257171631, "loss": 0.5873, "nll_loss": 0.5597413182258606, "rewards/accuracies": 1.0, "rewards/chosen": -0.18222913146018982, "rewards/margins": 0.18612203001976013, "rewards/rejected": -0.36835116147994995, "step": 3576 }, { "epoch": 9.793292265571527, "grad_norm": 3.247661590576172, "learning_rate": 5.1e-07, "log_odds_chosen": 1.8408586978912354, "log_odds_ratio": -0.2061573714017868, "logits/chosen": 1.0000505447387695, "logits/rejected": 0.9429481029510498, "logps/chosen": -1.419029951095581, "logps/rejected": -3.05039644241333, "loss": 0.5903, "nll_loss": 0.5696555972099304, "rewards/accuracies": 1.0, "rewards/chosen": -0.14190298318862915, "rewards/margins": 0.16313666105270386, "rewards/rejected": -0.305039644241333, "step": 3577 }, { "epoch": 9.796030116358658, "grad_norm": 3.0000908374786377, "learning_rate": 5.098630136986302e-07, "log_odds_chosen": 3.2088027000427246, "log_odds_ratio": -0.1497243046760559, "logits/chosen": 0.6352382302284241, "logits/rejected": 0.6466860175132751, "logps/chosen": -1.6260552406311035, "logps/rejected": -4.601839065551758, "loss": 0.5815, "nll_loss": 0.566482424736023, "rewards/accuracies": 1.0, "rewards/chosen": -0.16260552406311035, "rewards/margins": 0.297578364610672, "rewards/rejected": -0.46018388867378235, "step": 3578 }, { "epoch": 9.79876796714579, "grad_norm": 3.265092611312866, "learning_rate": 5.097260273972602e-07, "log_odds_chosen": 2.8124752044677734, "log_odds_ratio": -0.1660042107105255, "logits/chosen": 1.096240758895874, "logits/rejected": 1.095274567604065, "logps/chosen": -2.043936252593994, "logps/rejected": -4.646403789520264, "loss": 0.5599, "nll_loss": 0.5433027744293213, "rewards/accuracies": 1.0, "rewards/chosen": -0.2043936401605606, "rewards/margins": 0.26024675369262695, "rewards/rejected": -0.46464037895202637, "step": 3579 }, { "epoch": 9.801505817932922, "grad_norm": 3.2737197875976562, "learning_rate": 5.095890410958904e-07, "log_odds_chosen": 3.7345657348632812, "log_odds_ratio": -0.14605969190597534, "logits/chosen": 0.8439156413078308, "logits/rejected": 0.8604521751403809, "logps/chosen": -2.0990800857543945, "logps/rejected": -5.63137674331665, "loss": 0.6503, "nll_loss": 0.6357235908508301, "rewards/accuracies": 1.0, "rewards/chosen": -0.20990803837776184, "rewards/margins": 0.3532296419143677, "rewards/rejected": -0.5631376504898071, "step": 3580 }, { "epoch": 9.804243668720055, "grad_norm": 3.1587724685668945, "learning_rate": 5.094520547945205e-07, "log_odds_chosen": 3.593291997909546, "log_odds_ratio": -0.11995142698287964, "logits/chosen": 0.9408391118049622, "logits/rejected": 0.96257084608078, "logps/chosen": -2.1145339012145996, "logps/rejected": -5.502503871917725, "loss": 0.6753, "nll_loss": 0.663298487663269, "rewards/accuracies": 1.0, "rewards/chosen": -0.2114533931016922, "rewards/margins": 0.3387969732284546, "rewards/rejected": -0.5502503514289856, "step": 3581 }, { "epoch": 9.806981519507186, "grad_norm": 3.713785409927368, "learning_rate": 5.093150684931507e-07, "log_odds_chosen": 2.0241377353668213, "log_odds_ratio": -0.2820071876049042, "logits/chosen": 0.9947397112846375, "logits/rejected": 0.9848020672798157, "logps/chosen": -2.3288421630859375, "logps/rejected": -4.223671913146973, "loss": 0.6037, "nll_loss": 0.5755016803741455, "rewards/accuracies": 0.75, "rewards/chosen": -0.2328842282295227, "rewards/margins": 0.18948295712471008, "rewards/rejected": -0.4223671853542328, "step": 3582 }, { "epoch": 9.809719370294319, "grad_norm": 4.069874286651611, "learning_rate": 5.091780821917808e-07, "log_odds_chosen": 2.2155404090881348, "log_odds_ratio": -0.2642369866371155, "logits/chosen": 1.0393551588058472, "logits/rejected": 0.9690220355987549, "logps/chosen": -1.8431733846664429, "logps/rejected": -3.9164834022521973, "loss": 0.6666, "nll_loss": 0.6401714086532593, "rewards/accuracies": 1.0, "rewards/chosen": -0.18431732058525085, "rewards/margins": 0.20733101665973663, "rewards/rejected": -0.3916483521461487, "step": 3583 }, { "epoch": 9.81245722108145, "grad_norm": 3.7996764183044434, "learning_rate": 5.090410958904109e-07, "log_odds_chosen": 3.735691547393799, "log_odds_ratio": -0.18361462652683258, "logits/chosen": 0.8889358639717102, "logits/rejected": 0.6572836637496948, "logps/chosen": -1.4802577495574951, "logps/rejected": -4.964630126953125, "loss": 0.6657, "nll_loss": 0.6473528742790222, "rewards/accuracies": 0.875, "rewards/chosen": -0.1480257660150528, "rewards/margins": 0.34843724966049194, "rewards/rejected": -0.49646300077438354, "step": 3584 }, { "epoch": 9.815195071868583, "grad_norm": 3.57336688041687, "learning_rate": 5.089041095890411e-07, "log_odds_chosen": 2.5296518802642822, "log_odds_ratio": -0.14630009233951569, "logits/chosen": 1.0405372381210327, "logits/rejected": 1.0964670181274414, "logps/chosen": -1.5395443439483643, "logps/rejected": -3.8033976554870605, "loss": 0.524, "nll_loss": 0.5093468427658081, "rewards/accuracies": 1.0, "rewards/chosen": -0.15395444631576538, "rewards/margins": 0.22638531029224396, "rewards/rejected": -0.38033974170684814, "step": 3585 }, { "epoch": 9.817932922655714, "grad_norm": 4.204797744750977, "learning_rate": 5.087671232876712e-07, "log_odds_chosen": 1.710286259651184, "log_odds_ratio": -0.29610785841941833, "logits/chosen": 1.0690727233886719, "logits/rejected": 1.0502961874008179, "logps/chosen": -2.2459537982940674, "logps/rejected": -3.8181416988372803, "loss": 0.6437, "nll_loss": 0.6140480637550354, "rewards/accuracies": 0.875, "rewards/chosen": -0.22459539771080017, "rewards/margins": 0.1572187840938568, "rewards/rejected": -0.38181421160697937, "step": 3586 }, { "epoch": 9.820670773442847, "grad_norm": 3.1642770767211914, "learning_rate": 5.086301369863013e-07, "log_odds_chosen": 2.021989345550537, "log_odds_ratio": -0.2506874203681946, "logits/chosen": 0.8449186086654663, "logits/rejected": 0.8389719724655151, "logps/chosen": -1.8418521881103516, "logps/rejected": -3.7489519119262695, "loss": 0.5693, "nll_loss": 0.5442100763320923, "rewards/accuracies": 0.875, "rewards/chosen": -0.1841852068901062, "rewards/margins": 0.19071000814437866, "rewards/rejected": -0.37489521503448486, "step": 3587 }, { "epoch": 9.82340862422998, "grad_norm": 3.6740164756774902, "learning_rate": 5.084931506849315e-07, "log_odds_chosen": 2.4980075359344482, "log_odds_ratio": -0.5110772848129272, "logits/chosen": 0.7680394053459167, "logits/rejected": 0.7603088021278381, "logps/chosen": -2.3143436908721924, "logps/rejected": -4.70976448059082, "loss": 0.6227, "nll_loss": 0.5715991854667664, "rewards/accuracies": 0.875, "rewards/chosen": -0.2314343899488449, "rewards/margins": 0.23954205214977264, "rewards/rejected": -0.47097644209861755, "step": 3588 }, { "epoch": 9.826146475017111, "grad_norm": 3.5907058715820312, "learning_rate": 5.083561643835617e-07, "log_odds_chosen": 1.8833688497543335, "log_odds_ratio": -0.2867368459701538, "logits/chosen": 1.0243453979492188, "logits/rejected": 1.0162556171417236, "logps/chosen": -1.6519540548324585, "logps/rejected": -3.3883800506591797, "loss": 0.4959, "nll_loss": 0.46725472807884216, "rewards/accuracies": 0.875, "rewards/chosen": -0.16519540548324585, "rewards/margins": 0.1736425757408142, "rewards/rejected": -0.33883798122406006, "step": 3589 }, { "epoch": 9.828884325804244, "grad_norm": 4.450386047363281, "learning_rate": 5.082191780821917e-07, "log_odds_chosen": 1.2750518321990967, "log_odds_ratio": -0.4533015787601471, "logits/chosen": 0.8767822980880737, "logits/rejected": 0.8670761585235596, "logps/chosen": -2.590916395187378, "logps/rejected": -3.7680678367614746, "loss": 0.6525, "nll_loss": 0.6071839332580566, "rewards/accuracies": 0.75, "rewards/chosen": -0.25909164547920227, "rewards/margins": 0.11771515011787415, "rewards/rejected": -0.3768067955970764, "step": 3590 }, { "epoch": 9.831622176591376, "grad_norm": 4.481198787689209, "learning_rate": 5.080821917808219e-07, "log_odds_chosen": 2.0479025840759277, "log_odds_ratio": -0.258211225271225, "logits/chosen": 1.0673551559448242, "logits/rejected": 1.021384835243225, "logps/chosen": -2.018155097961426, "logps/rejected": -3.861280679702759, "loss": 0.6361, "nll_loss": 0.6102916598320007, "rewards/accuracies": 0.875, "rewards/chosen": -0.20181551575660706, "rewards/margins": 0.1843125820159912, "rewards/rejected": -0.3861280679702759, "step": 3591 }, { "epoch": 9.834360027378509, "grad_norm": 5.965595722198486, "learning_rate": 5.079452054794521e-07, "log_odds_chosen": 3.093238353729248, "log_odds_ratio": -0.20943935215473175, "logits/chosen": 1.1227487325668335, "logits/rejected": 1.1205248832702637, "logps/chosen": -1.9655524492263794, "logps/rejected": -4.917510986328125, "loss": 0.5616, "nll_loss": 0.5406510829925537, "rewards/accuracies": 1.0, "rewards/chosen": -0.1965552270412445, "rewards/margins": 0.29519590735435486, "rewards/rejected": -0.49175116419792175, "step": 3592 }, { "epoch": 9.83709787816564, "grad_norm": 6.880102634429932, "learning_rate": 5.078082191780821e-07, "log_odds_chosen": 0.6731734275817871, "log_odds_ratio": -0.5380198359489441, "logits/chosen": 0.980146586894989, "logits/rejected": 0.9768778681755066, "logps/chosen": -2.7464942932128906, "logps/rejected": -3.3066141605377197, "loss": 0.6183, "nll_loss": 0.564478874206543, "rewards/accuracies": 0.875, "rewards/chosen": -0.274649441242218, "rewards/margins": 0.05601197108626366, "rewards/rejected": -0.330661416053772, "step": 3593 }, { "epoch": 9.839835728952773, "grad_norm": 3.684937000274658, "learning_rate": 5.076712328767123e-07, "log_odds_chosen": 1.8348857164382935, "log_odds_ratio": -0.18091478943824768, "logits/chosen": 1.2154299020767212, "logits/rejected": 1.1939853429794312, "logps/chosen": -1.9059442281723022, "logps/rejected": -3.567683696746826, "loss": 0.6024, "nll_loss": 0.5843202471733093, "rewards/accuracies": 1.0, "rewards/chosen": -0.19059443473815918, "rewards/margins": 0.16617393493652344, "rewards/rejected": -0.3567683696746826, "step": 3594 }, { "epoch": 9.842573579739904, "grad_norm": 3.2136425971984863, "learning_rate": 5.075342465753425e-07, "log_odds_chosen": 3.066279172897339, "log_odds_ratio": -0.18585209548473358, "logits/chosen": 1.0714421272277832, "logits/rejected": 1.0926101207733154, "logps/chosen": -1.4521785974502563, "logps/rejected": -4.170401573181152, "loss": 0.5236, "nll_loss": 0.505001962184906, "rewards/accuracies": 1.0, "rewards/chosen": -0.1452178657054901, "rewards/margins": 0.2718222737312317, "rewards/rejected": -0.4170401692390442, "step": 3595 }, { "epoch": 9.845311430527037, "grad_norm": 4.023436069488525, "learning_rate": 5.073972602739726e-07, "log_odds_chosen": 0.9636661410331726, "log_odds_ratio": -0.4330713748931885, "logits/chosen": 1.0343385934829712, "logits/rejected": 1.0213603973388672, "logps/chosen": -2.3626716136932373, "logps/rejected": -3.2739691734313965, "loss": 0.6264, "nll_loss": 0.5830750465393066, "rewards/accuracies": 0.875, "rewards/chosen": -0.23626714944839478, "rewards/margins": 0.09112979471683502, "rewards/rejected": -0.327396959066391, "step": 3596 }, { "epoch": 9.848049281314168, "grad_norm": 3.2802107334136963, "learning_rate": 5.072602739726027e-07, "log_odds_chosen": 2.1469621658325195, "log_odds_ratio": -0.2588753402233124, "logits/chosen": 0.9310649633407593, "logits/rejected": 0.9632359743118286, "logps/chosen": -1.961874008178711, "logps/rejected": -3.9689183235168457, "loss": 0.5843, "nll_loss": 0.5583755970001221, "rewards/accuracies": 1.0, "rewards/chosen": -0.19618739187717438, "rewards/margins": 0.200704425573349, "rewards/rejected": -0.39689183235168457, "step": 3597 }, { "epoch": 9.850787132101301, "grad_norm": 3.0128512382507324, "learning_rate": 5.071232876712328e-07, "log_odds_chosen": 2.5943124294281006, "log_odds_ratio": -0.25161463022232056, "logits/chosen": 0.9271590709686279, "logits/rejected": 0.9317728281021118, "logps/chosen": -1.4262607097625732, "logps/rejected": -3.8248953819274902, "loss": 0.5503, "nll_loss": 0.525178849697113, "rewards/accuracies": 0.875, "rewards/chosen": -0.1426260769367218, "rewards/margins": 0.23986348509788513, "rewards/rejected": -0.38248956203460693, "step": 3598 }, { "epoch": 9.853524982888432, "grad_norm": 3.812479019165039, "learning_rate": 5.06986301369863e-07, "log_odds_chosen": 2.351714611053467, "log_odds_ratio": -0.16020169854164124, "logits/chosen": 0.830477237701416, "logits/rejected": 0.829687774181366, "logps/chosen": -1.7972824573516846, "logps/rejected": -3.941650390625, "loss": 0.6001, "nll_loss": 0.5840446352958679, "rewards/accuracies": 1.0, "rewards/chosen": -0.17972823977470398, "rewards/margins": 0.21443679928779602, "rewards/rejected": -0.3941650688648224, "step": 3599 }, { "epoch": 9.856262833675565, "grad_norm": 5.758276462554932, "learning_rate": 5.068493150684931e-07, "log_odds_chosen": 2.9084088802337646, "log_odds_ratio": -0.16283580660820007, "logits/chosen": 0.8955255746841431, "logits/rejected": 0.8102712035179138, "logps/chosen": -2.4216785430908203, "logps/rejected": -5.202767372131348, "loss": 0.6957, "nll_loss": 0.6794301271438599, "rewards/accuracies": 1.0, "rewards/chosen": -0.2421678602695465, "rewards/margins": 0.2781089246273041, "rewards/rejected": -0.5202767848968506, "step": 3600 }, { "epoch": 9.859000684462696, "grad_norm": 3.3561692237854004, "learning_rate": 5.067123287671232e-07, "log_odds_chosen": 3.5105347633361816, "log_odds_ratio": -0.1160539910197258, "logits/chosen": 1.3418498039245605, "logits/rejected": 1.3647738695144653, "logps/chosen": -1.9191651344299316, "logps/rejected": -5.246775150299072, "loss": 0.5949, "nll_loss": 0.5833178758621216, "rewards/accuracies": 1.0, "rewards/chosen": -0.19191652536392212, "rewards/margins": 0.3327610194683075, "rewards/rejected": -0.5246775150299072, "step": 3601 }, { "epoch": 9.86173853524983, "grad_norm": 3.327634334564209, "learning_rate": 5.065753424657534e-07, "log_odds_chosen": 2.5543220043182373, "log_odds_ratio": -0.1520061194896698, "logits/chosen": 1.07218599319458, "logits/rejected": 1.022214412689209, "logps/chosen": -1.4471166133880615, "logps/rejected": -3.742276430130005, "loss": 0.5444, "nll_loss": 0.5292462706565857, "rewards/accuracies": 1.0, "rewards/chosen": -0.1447116732597351, "rewards/margins": 0.22951596975326538, "rewards/rejected": -0.3742276728153229, "step": 3602 }, { "epoch": 9.86447638603696, "grad_norm": 3.349761724472046, "learning_rate": 5.064383561643836e-07, "log_odds_chosen": 3.5190088748931885, "log_odds_ratio": -0.132480189204216, "logits/chosen": 0.6206278800964355, "logits/rejected": 0.5841671824455261, "logps/chosen": -1.9389870166778564, "logps/rejected": -5.255860328674316, "loss": 0.5652, "nll_loss": 0.5520012378692627, "rewards/accuracies": 1.0, "rewards/chosen": -0.19389870762825012, "rewards/margins": 0.3316873610019684, "rewards/rejected": -0.5255860686302185, "step": 3603 }, { "epoch": 9.867214236824093, "grad_norm": 5.897262096405029, "learning_rate": 5.063013698630136e-07, "log_odds_chosen": 1.8852224349975586, "log_odds_ratio": -0.4924212098121643, "logits/chosen": 0.9184368252754211, "logits/rejected": 0.9174243211746216, "logps/chosen": -1.8760260343551636, "logps/rejected": -3.5687294006347656, "loss": 0.6022, "nll_loss": 0.5530072450637817, "rewards/accuracies": 0.75, "rewards/chosen": -0.18760260939598083, "rewards/margins": 0.169270321726799, "rewards/rejected": -0.35687291622161865, "step": 3604 }, { "epoch": 9.869952087611225, "grad_norm": 5.936758995056152, "learning_rate": 5.061643835616438e-07, "log_odds_chosen": 1.016270637512207, "log_odds_ratio": -0.5874884128570557, "logits/chosen": 0.7888734340667725, "logits/rejected": 0.775246262550354, "logps/chosen": -2.090083599090576, "logps/rejected": -2.9641332626342773, "loss": 0.6038, "nll_loss": 0.5450977683067322, "rewards/accuracies": 0.875, "rewards/chosen": -0.2090083658695221, "rewards/margins": 0.08740495145320892, "rewards/rejected": -0.2964133024215698, "step": 3605 }, { "epoch": 9.872689938398358, "grad_norm": 7.512608051300049, "learning_rate": 5.06027397260274e-07, "log_odds_chosen": 1.5326753854751587, "log_odds_ratio": -0.3247383236885071, "logits/chosen": 1.0214189291000366, "logits/rejected": 1.0052952766418457, "logps/chosen": -2.0817434787750244, "logps/rejected": -3.448113203048706, "loss": 0.77, "nll_loss": 0.7375068664550781, "rewards/accuracies": 1.0, "rewards/chosen": -0.20817433297634125, "rewards/margins": 0.13663700222969055, "rewards/rejected": -0.3448113203048706, "step": 3606 }, { "epoch": 9.875427789185489, "grad_norm": 4.762118816375732, "learning_rate": 5.05890410958904e-07, "log_odds_chosen": 1.9654878377914429, "log_odds_ratio": -0.2923112213611603, "logits/chosen": 0.8459131717681885, "logits/rejected": 0.8308942914009094, "logps/chosen": -2.143231153488159, "logps/rejected": -3.8500475883483887, "loss": 0.5786, "nll_loss": 0.5493956804275513, "rewards/accuracies": 0.875, "rewards/chosen": -0.21432313323020935, "rewards/margins": 0.1706816405057907, "rewards/rejected": -0.38500475883483887, "step": 3607 }, { "epoch": 9.878165639972622, "grad_norm": 3.6823267936706543, "learning_rate": 5.057534246575342e-07, "log_odds_chosen": 2.6100573539733887, "log_odds_ratio": -0.2328527569770813, "logits/chosen": 1.0841611623764038, "logits/rejected": 1.1230523586273193, "logps/chosen": -2.1502292156219482, "logps/rejected": -4.593350410461426, "loss": 0.6128, "nll_loss": 0.5895450115203857, "rewards/accuracies": 0.875, "rewards/chosen": -0.21502292156219482, "rewards/margins": 0.24431216716766357, "rewards/rejected": -0.459335058927536, "step": 3608 }, { "epoch": 9.880903490759753, "grad_norm": 6.811489582061768, "learning_rate": 5.056164383561644e-07, "log_odds_chosen": 2.989220380783081, "log_odds_ratio": -0.33711978793144226, "logits/chosen": 0.7844489812850952, "logits/rejected": 0.7577950358390808, "logps/chosen": -2.4364707469940186, "logps/rejected": -5.2470784187316895, "loss": 0.7194, "nll_loss": 0.6857150793075562, "rewards/accuracies": 0.875, "rewards/chosen": -0.24364709854125977, "rewards/margins": 0.28106075525283813, "rewards/rejected": -0.5247078537940979, "step": 3609 }, { "epoch": 9.883641341546886, "grad_norm": 3.57369065284729, "learning_rate": 5.054794520547944e-07, "log_odds_chosen": 1.5870519876480103, "log_odds_ratio": -0.2509320080280304, "logits/chosen": 1.1094553470611572, "logits/rejected": 1.1063740253448486, "logps/chosen": -1.4127910137176514, "logps/rejected": -2.793461322784424, "loss": 0.5369, "nll_loss": 0.5118557214736938, "rewards/accuracies": 1.0, "rewards/chosen": -0.14127910137176514, "rewards/margins": 0.13806705176830292, "rewards/rejected": -0.27934616804122925, "step": 3610 }, { "epoch": 9.886379192334019, "grad_norm": 3.1486198902130127, "learning_rate": 5.053424657534246e-07, "log_odds_chosen": 2.5274972915649414, "log_odds_ratio": -0.22578370571136475, "logits/chosen": 0.937113344669342, "logits/rejected": 0.8864662051200867, "logps/chosen": -1.5601682662963867, "logps/rejected": -3.852398633956909, "loss": 0.5671, "nll_loss": 0.5444849729537964, "rewards/accuracies": 1.0, "rewards/chosen": -0.15601684153079987, "rewards/margins": 0.22922304272651672, "rewards/rejected": -0.3852398693561554, "step": 3611 }, { "epoch": 9.88911704312115, "grad_norm": 8.348567008972168, "learning_rate": 5.052054794520547e-07, "log_odds_chosen": 0.8355380892753601, "log_odds_ratio": -0.7000473141670227, "logits/chosen": 0.8819833993911743, "logits/rejected": 0.7824928164482117, "logps/chosen": -2.0038092136383057, "logps/rejected": -2.648016929626465, "loss": 0.5833, "nll_loss": 0.5133416056632996, "rewards/accuracies": 0.875, "rewards/chosen": -0.20038092136383057, "rewards/margins": 0.06442075967788696, "rewards/rejected": -0.26480168104171753, "step": 3612 }, { "epoch": 9.891854893908281, "grad_norm": 2.983506202697754, "learning_rate": 5.050684931506849e-07, "log_odds_chosen": 2.7949724197387695, "log_odds_ratio": -0.15179745852947235, "logits/chosen": 0.884872317314148, "logits/rejected": 0.8709267377853394, "logps/chosen": -1.9422639608383179, "logps/rejected": -4.584230899810791, "loss": 0.5382, "nll_loss": 0.5230075716972351, "rewards/accuracies": 1.0, "rewards/chosen": -0.19422639906406403, "rewards/margins": 0.2641966938972473, "rewards/rejected": -0.45842307806015015, "step": 3613 }, { "epoch": 9.894592744695414, "grad_norm": 3.337674140930176, "learning_rate": 5.04931506849315e-07, "log_odds_chosen": 3.9896507263183594, "log_odds_ratio": -0.121512271463871, "logits/chosen": 1.187133550643921, "logits/rejected": 1.2753429412841797, "logps/chosen": -1.7870802879333496, "logps/rejected": -5.590037822723389, "loss": 0.5544, "nll_loss": 0.542276918888092, "rewards/accuracies": 1.0, "rewards/chosen": -0.1787080466747284, "rewards/margins": 0.3802957534790039, "rewards/rejected": -0.5590038299560547, "step": 3614 }, { "epoch": 9.897330595482547, "grad_norm": 3.2304275035858154, "learning_rate": 5.047945205479451e-07, "log_odds_chosen": 3.102184534072876, "log_odds_ratio": -0.09680136293172836, "logits/chosen": 0.630893886089325, "logits/rejected": 0.555309534072876, "logps/chosen": -1.8540120124816895, "logps/rejected": -4.782317161560059, "loss": 0.5449, "nll_loss": 0.5352429151535034, "rewards/accuracies": 1.0, "rewards/chosen": -0.18540120124816895, "rewards/margins": 0.2928304970264435, "rewards/rejected": -0.4782317280769348, "step": 3615 }, { "epoch": 9.900068446269678, "grad_norm": 3.7838072776794434, "learning_rate": 5.046575342465753e-07, "log_odds_chosen": 1.5743556022644043, "log_odds_ratio": -0.3530292809009552, "logits/chosen": 0.6827815175056458, "logits/rejected": 0.6354065537452698, "logps/chosen": -1.4499258995056152, "logps/rejected": -2.873170852661133, "loss": 0.5681, "nll_loss": 0.5327644348144531, "rewards/accuracies": 0.75, "rewards/chosen": -0.14499258995056152, "rewards/margins": 0.14232449233531952, "rewards/rejected": -0.28731709718704224, "step": 3616 }, { "epoch": 9.902806297056811, "grad_norm": 3.6376631259918213, "learning_rate": 5.045205479452054e-07, "log_odds_chosen": 2.2692313194274902, "log_odds_ratio": -0.21219973266124725, "logits/chosen": 1.1552232503890991, "logits/rejected": 1.1629884243011475, "logps/chosen": -2.1913602352142334, "logps/rejected": -4.304553031921387, "loss": 0.6595, "nll_loss": 0.6382670998573303, "rewards/accuracies": 1.0, "rewards/chosen": -0.21913602948188782, "rewards/margins": 0.21131926774978638, "rewards/rejected": -0.4304552972316742, "step": 3617 }, { "epoch": 9.905544147843942, "grad_norm": 3.376816511154175, "learning_rate": 5.043835616438355e-07, "log_odds_chosen": 3.3941922187805176, "log_odds_ratio": -0.20565678179264069, "logits/chosen": 1.2148051261901855, "logits/rejected": 1.270268440246582, "logps/chosen": -2.0968375205993652, "logps/rejected": -5.282623767852783, "loss": 0.5983, "nll_loss": 0.5777395963668823, "rewards/accuracies": 1.0, "rewards/chosen": -0.20968376100063324, "rewards/margins": 0.3185786008834839, "rewards/rejected": -0.5282623767852783, "step": 3618 }, { "epoch": 9.908281998631075, "grad_norm": 3.418666362762451, "learning_rate": 5.042465753424657e-07, "log_odds_chosen": 2.0956811904907227, "log_odds_ratio": -0.2235114425420761, "logits/chosen": 0.9482157230377197, "logits/rejected": 0.9116692543029785, "logps/chosen": -1.6887564659118652, "logps/rejected": -3.632814884185791, "loss": 0.6356, "nll_loss": 0.6132885813713074, "rewards/accuracies": 0.875, "rewards/chosen": -0.16887564957141876, "rewards/margins": 0.19440585374832153, "rewards/rejected": -0.3632814884185791, "step": 3619 }, { "epoch": 9.911019849418206, "grad_norm": 2.8483290672302246, "learning_rate": 5.041095890410959e-07, "log_odds_chosen": 6.498343467712402, "log_odds_ratio": -0.01440982986241579, "logits/chosen": 1.1213653087615967, "logits/rejected": 1.136656641960144, "logps/chosen": -1.3459962606430054, "logps/rejected": -7.475313663482666, "loss": 0.5488, "nll_loss": 0.5473724603652954, "rewards/accuracies": 1.0, "rewards/chosen": -0.13459962606430054, "rewards/margins": 0.6129317879676819, "rewards/rejected": -0.7475314140319824, "step": 3620 }, { "epoch": 9.91375770020534, "grad_norm": 3.5368847846984863, "learning_rate": 5.039726027397259e-07, "log_odds_chosen": 1.9951744079589844, "log_odds_ratio": -0.2963384985923767, "logits/chosen": 0.7785505652427673, "logits/rejected": 0.7341371774673462, "logps/chosen": -1.6685371398925781, "logps/rejected": -3.5255990028381348, "loss": 0.5611, "nll_loss": 0.5314558148384094, "rewards/accuracies": 1.0, "rewards/chosen": -0.16685369610786438, "rewards/margins": 0.18570619821548462, "rewards/rejected": -0.3525599241256714, "step": 3621 }, { "epoch": 9.91649555099247, "grad_norm": 4.119333267211914, "learning_rate": 5.038356164383561e-07, "log_odds_chosen": 1.1525079011917114, "log_odds_ratio": -0.3576880693435669, "logits/chosen": 0.7921464443206787, "logits/rejected": 0.8378702998161316, "logps/chosen": -2.261199951171875, "logps/rejected": -3.318511486053467, "loss": 0.6307, "nll_loss": 0.5949190855026245, "rewards/accuracies": 0.875, "rewards/chosen": -0.2261199951171875, "rewards/margins": 0.10573112964630127, "rewards/rejected": -0.33185112476348877, "step": 3622 }, { "epoch": 9.919233401779604, "grad_norm": 3.664651870727539, "learning_rate": 5.036986301369863e-07, "log_odds_chosen": 1.4178440570831299, "log_odds_ratio": -0.24852526187896729, "logits/chosen": 0.9044561982154846, "logits/rejected": 0.8495393991470337, "logps/chosen": -2.0131943225860596, "logps/rejected": -3.2834625244140625, "loss": 0.604, "nll_loss": 0.5791727304458618, "rewards/accuracies": 1.0, "rewards/chosen": -0.20131941139698029, "rewards/margins": 0.12702684104442596, "rewards/rejected": -0.32834625244140625, "step": 3623 }, { "epoch": 9.921971252566735, "grad_norm": 5.114840984344482, "learning_rate": 5.035616438356163e-07, "log_odds_chosen": 2.655733823776245, "log_odds_ratio": -0.5670244097709656, "logits/chosen": 1.096949577331543, "logits/rejected": 1.0936230421066284, "logps/chosen": -2.2052221298217773, "logps/rejected": -4.718771457672119, "loss": 0.6083, "nll_loss": 0.5515953898429871, "rewards/accuracies": 0.875, "rewards/chosen": -0.2205222100019455, "rewards/margins": 0.2513549327850342, "rewards/rejected": -0.47187718749046326, "step": 3624 }, { "epoch": 9.924709103353868, "grad_norm": 2.9850800037384033, "learning_rate": 5.034246575342465e-07, "log_odds_chosen": 4.260527610778809, "log_odds_ratio": -0.2035113275051117, "logits/chosen": 1.1261098384857178, "logits/rejected": 1.0907766819000244, "logps/chosen": -1.9933077096939087, "logps/rejected": -5.966130256652832, "loss": 0.5623, "nll_loss": 0.5419014692306519, "rewards/accuracies": 0.875, "rewards/chosen": -0.19933077692985535, "rewards/margins": 0.3972822427749634, "rewards/rejected": -0.5966129899024963, "step": 3625 }, { "epoch": 9.927446954140999, "grad_norm": 4.075067043304443, "learning_rate": 5.032876712328768e-07, "log_odds_chosen": 1.927187204360962, "log_odds_ratio": -0.43034589290618896, "logits/chosen": 1.0267446041107178, "logits/rejected": 1.1344695091247559, "logps/chosen": -2.5041708946228027, "logps/rejected": -4.270471572875977, "loss": 0.7499, "nll_loss": 0.7068412899971008, "rewards/accuracies": 0.875, "rewards/chosen": -0.2504171133041382, "rewards/margins": 0.17663007974624634, "rewards/rejected": -0.4270471930503845, "step": 3626 }, { "epoch": 9.930184804928132, "grad_norm": 3.692805767059326, "learning_rate": 5.031506849315069e-07, "log_odds_chosen": 2.0816407203674316, "log_odds_ratio": -0.3081676661968231, "logits/chosen": 0.7936890125274658, "logits/rejected": 0.780714750289917, "logps/chosen": -2.0098981857299805, "logps/rejected": -3.9200356006622314, "loss": 0.5365, "nll_loss": 0.5056529641151428, "rewards/accuracies": 0.875, "rewards/chosen": -0.20098984241485596, "rewards/margins": 0.19101372361183167, "rewards/rejected": -0.3920035660266876, "step": 3627 }, { "epoch": 9.932922655715263, "grad_norm": 3.9226624965667725, "learning_rate": 5.03013698630137e-07, "log_odds_chosen": 2.7724595069885254, "log_odds_ratio": -0.17024967074394226, "logits/chosen": 0.8265644311904907, "logits/rejected": 0.8212810754776001, "logps/chosen": -1.903841495513916, "logps/rejected": -4.429020881652832, "loss": 0.7014, "nll_loss": 0.6843342185020447, "rewards/accuracies": 1.0, "rewards/chosen": -0.1903841495513916, "rewards/margins": 0.252517968416214, "rewards/rejected": -0.4429021179676056, "step": 3628 }, { "epoch": 9.935660506502396, "grad_norm": 3.9002716541290283, "learning_rate": 5.02876712328767e-07, "log_odds_chosen": 2.3076252937316895, "log_odds_ratio": -0.34710371494293213, "logits/chosen": 0.9902564883232117, "logits/rejected": 0.9977271556854248, "logps/chosen": -2.113933563232422, "logps/rejected": -4.3432769775390625, "loss": 0.7288, "nll_loss": 0.6941354870796204, "rewards/accuracies": 0.875, "rewards/chosen": -0.2113933563232422, "rewards/margins": 0.22293433547019958, "rewards/rejected": -0.4343276917934418, "step": 3629 }, { "epoch": 9.938398357289527, "grad_norm": 9.74380111694336, "learning_rate": 5.027397260273973e-07, "log_odds_chosen": 2.019087076187134, "log_odds_ratio": -0.5064513683319092, "logits/chosen": 1.0790292024612427, "logits/rejected": 1.0508880615234375, "logps/chosen": -2.5879862308502197, "logps/rejected": -4.506946086883545, "loss": 0.8486, "nll_loss": 0.7979794144630432, "rewards/accuracies": 0.875, "rewards/chosen": -0.25879862904548645, "rewards/margins": 0.1918959766626358, "rewards/rejected": -0.45069462060928345, "step": 3630 }, { "epoch": 9.94113620807666, "grad_norm": 3.0065927505493164, "learning_rate": 5.026027397260274e-07, "log_odds_chosen": 2.467864990234375, "log_odds_ratio": -0.16625793278217316, "logits/chosen": 1.0092101097106934, "logits/rejected": 0.9811421632766724, "logps/chosen": -1.5827504396438599, "logps/rejected": -3.721369743347168, "loss": 0.4956, "nll_loss": 0.4789711534976959, "rewards/accuracies": 1.0, "rewards/chosen": -0.1582750380039215, "rewards/margins": 0.21386191248893738, "rewards/rejected": -0.3721369504928589, "step": 3631 }, { "epoch": 9.943874058863791, "grad_norm": 3.1491663455963135, "learning_rate": 5.024657534246575e-07, "log_odds_chosen": 4.148811340332031, "log_odds_ratio": -0.12734490633010864, "logits/chosen": 0.810247540473938, "logits/rejected": 0.8723218441009521, "logps/chosen": -1.641373872756958, "logps/rejected": -5.510333061218262, "loss": 0.6559, "nll_loss": 0.6432009339332581, "rewards/accuracies": 1.0, "rewards/chosen": -0.16413739323616028, "rewards/margins": 0.38689592480659485, "rewards/rejected": -0.5510333180427551, "step": 3632 }, { "epoch": 9.946611909650924, "grad_norm": 5.282006740570068, "learning_rate": 5.023287671232877e-07, "log_odds_chosen": 2.822556495666504, "log_odds_ratio": -0.21356281638145447, "logits/chosen": 1.1803340911865234, "logits/rejected": 1.1580904722213745, "logps/chosen": -1.9324421882629395, "logps/rejected": -4.571834087371826, "loss": 0.5923, "nll_loss": 0.5709066390991211, "rewards/accuracies": 0.875, "rewards/chosen": -0.19324421882629395, "rewards/margins": 0.26393914222717285, "rewards/rejected": -0.4571833908557892, "step": 3633 }, { "epoch": 9.949349760438055, "grad_norm": 3.3932595252990723, "learning_rate": 5.021917808219179e-07, "log_odds_chosen": 1.937305212020874, "log_odds_ratio": -0.22952014207839966, "logits/chosen": 0.7989004254341125, "logits/rejected": 0.7721145153045654, "logps/chosen": -1.7175261974334717, "logps/rejected": -3.4327290058135986, "loss": 0.5877, "nll_loss": 0.5647374987602234, "rewards/accuracies": 1.0, "rewards/chosen": -0.17175261676311493, "rewards/margins": 0.17152026295661926, "rewards/rejected": -0.343272864818573, "step": 3634 }, { "epoch": 9.952087611225188, "grad_norm": 3.413708448410034, "learning_rate": 5.020547945205479e-07, "log_odds_chosen": 3.646469831466675, "log_odds_ratio": -0.14408832788467407, "logits/chosen": 1.2357844114303589, "logits/rejected": 1.2974358797073364, "logps/chosen": -2.2093257904052734, "logps/rejected": -5.708454132080078, "loss": 0.5198, "nll_loss": 0.5053662657737732, "rewards/accuracies": 1.0, "rewards/chosen": -0.22093255817890167, "rewards/margins": 0.3499128520488739, "rewards/rejected": -0.5708454251289368, "step": 3635 }, { "epoch": 9.95482546201232, "grad_norm": 3.188917875289917, "learning_rate": 5.019178082191781e-07, "log_odds_chosen": 3.824753999710083, "log_odds_ratio": -0.1206047385931015, "logits/chosen": 1.1512248516082764, "logits/rejected": 1.1180696487426758, "logps/chosen": -1.5302824974060059, "logps/rejected": -5.155098915100098, "loss": 0.5629, "nll_loss": 0.5508750081062317, "rewards/accuracies": 1.0, "rewards/chosen": -0.15302824974060059, "rewards/margins": 0.36248165369033813, "rewards/rejected": -0.5155099630355835, "step": 3636 }, { "epoch": 9.957563312799453, "grad_norm": 3.5885019302368164, "learning_rate": 5.017808219178083e-07, "log_odds_chosen": 2.3076140880584717, "log_odds_ratio": -0.17050470411777496, "logits/chosen": 1.2720322608947754, "logits/rejected": 1.2905160188674927, "logps/chosen": -2.991626262664795, "logps/rejected": -5.198763847351074, "loss": 0.6217, "nll_loss": 0.6046923398971558, "rewards/accuracies": 1.0, "rewards/chosen": -0.2991626262664795, "rewards/margins": 0.2207137644290924, "rewards/rejected": -0.5198763608932495, "step": 3637 }, { "epoch": 9.960301163586585, "grad_norm": 3.8553884029388428, "learning_rate": 5.016438356164383e-07, "log_odds_chosen": 1.6028698682785034, "log_odds_ratio": -0.34077519178390503, "logits/chosen": 1.0474919080734253, "logits/rejected": 1.0139260292053223, "logps/chosen": -2.4412693977355957, "logps/rejected": -3.9095098972320557, "loss": 0.6058, "nll_loss": 0.5716797709465027, "rewards/accuracies": 1.0, "rewards/chosen": -0.24412693083286285, "rewards/margins": 0.14682406187057495, "rewards/rejected": -0.3909509778022766, "step": 3638 }, { "epoch": 9.963039014373717, "grad_norm": 4.483834743499756, "learning_rate": 5.015068493150685e-07, "log_odds_chosen": 2.325775623321533, "log_odds_ratio": -0.25389519333839417, "logits/chosen": 1.1875219345092773, "logits/rejected": 1.117795705795288, "logps/chosen": -2.2978017330169678, "logps/rejected": -4.513136863708496, "loss": 0.6979, "nll_loss": 0.6725484728813171, "rewards/accuracies": 1.0, "rewards/chosen": -0.2297801673412323, "rewards/margins": 0.22153350710868835, "rewards/rejected": -0.45131367444992065, "step": 3639 }, { "epoch": 9.965776865160848, "grad_norm": 3.495950937271118, "learning_rate": 5.013698630136987e-07, "log_odds_chosen": 1.3464220762252808, "log_odds_ratio": -0.33591794967651367, "logits/chosen": 1.0983386039733887, "logits/rejected": 1.1614538431167603, "logps/chosen": -2.5277562141418457, "logps/rejected": -3.805759906768799, "loss": 0.5891, "nll_loss": 0.5555442571640015, "rewards/accuracies": 0.875, "rewards/chosen": -0.2527756094932556, "rewards/margins": 0.12780039012432098, "rewards/rejected": -0.3805760145187378, "step": 3640 }, { "epoch": 9.96851471594798, "grad_norm": 3.326490879058838, "learning_rate": 5.012328767123288e-07, "log_odds_chosen": 1.9173533916473389, "log_odds_ratio": -0.20285235345363617, "logits/chosen": 1.0073922872543335, "logits/rejected": 0.9421449899673462, "logps/chosen": -1.9956457614898682, "logps/rejected": -3.7832109928131104, "loss": 0.5943, "nll_loss": 0.5739883780479431, "rewards/accuracies": 1.0, "rewards/chosen": -0.19956457614898682, "rewards/margins": 0.17875653505325317, "rewards/rejected": -0.37832111120224, "step": 3641 }, { "epoch": 9.971252566735114, "grad_norm": 3.417731285095215, "learning_rate": 5.010958904109589e-07, "log_odds_chosen": 1.6959471702575684, "log_odds_ratio": -0.3194183111190796, "logits/chosen": 0.7974916696548462, "logits/rejected": 0.7290141582489014, "logps/chosen": -1.4835903644561768, "logps/rejected": -2.91306734085083, "loss": 0.5445, "nll_loss": 0.5125969648361206, "rewards/accuracies": 0.875, "rewards/chosen": -0.1483590453863144, "rewards/margins": 0.14294767379760742, "rewards/rejected": -0.291306734085083, "step": 3642 }, { "epoch": 9.973990417522245, "grad_norm": 4.206209182739258, "learning_rate": 5.00958904109589e-07, "log_odds_chosen": 1.9687215089797974, "log_odds_ratio": -0.3281756043434143, "logits/chosen": 0.7433875203132629, "logits/rejected": 0.661579966545105, "logps/chosen": -2.142550468444824, "logps/rejected": -3.995067596435547, "loss": 0.6859, "nll_loss": 0.6531227231025696, "rewards/accuracies": 1.0, "rewards/chosen": -0.21425503492355347, "rewards/margins": 0.18525171279907227, "rewards/rejected": -0.39950674772262573, "step": 3643 }, { "epoch": 9.976728268309378, "grad_norm": 3.7069342136383057, "learning_rate": 5.008219178082192e-07, "log_odds_chosen": 1.7226917743682861, "log_odds_ratio": -0.2612813115119934, "logits/chosen": 1.0475802421569824, "logits/rejected": 1.0329556465148926, "logps/chosen": -1.8210560083389282, "logps/rejected": -3.426680088043213, "loss": 0.5064, "nll_loss": 0.4802832305431366, "rewards/accuracies": 1.0, "rewards/chosen": -0.18210560083389282, "rewards/margins": 0.1605623960494995, "rewards/rejected": -0.34266799688339233, "step": 3644 }, { "epoch": 9.979466119096509, "grad_norm": 4.199769496917725, "learning_rate": 5.006849315068493e-07, "log_odds_chosen": 1.4466935396194458, "log_odds_ratio": -0.3212132453918457, "logits/chosen": 1.2717666625976562, "logits/rejected": 1.2387406826019287, "logps/chosen": -2.4539401531219482, "logps/rejected": -3.790313243865967, "loss": 0.5375, "nll_loss": 0.5053887367248535, "rewards/accuracies": 1.0, "rewards/chosen": -0.2453940212726593, "rewards/margins": 0.13363730907440186, "rewards/rejected": -0.37903136014938354, "step": 3645 }, { "epoch": 9.982203969883642, "grad_norm": 4.364726543426514, "learning_rate": 5.005479452054794e-07, "log_odds_chosen": 3.0469062328338623, "log_odds_ratio": -0.3317505121231079, "logits/chosen": 1.0795682668685913, "logits/rejected": 1.1187790632247925, "logps/chosen": -2.20497465133667, "logps/rejected": -5.0699872970581055, "loss": 0.7383, "nll_loss": 0.705094039440155, "rewards/accuracies": 0.875, "rewards/chosen": -0.2204974740743637, "rewards/margins": 0.28650131821632385, "rewards/rejected": -0.5069987773895264, "step": 3646 }, { "epoch": 9.984941820670773, "grad_norm": 3.567373752593994, "learning_rate": 5.004109589041096e-07, "log_odds_chosen": 2.237593173980713, "log_odds_ratio": -0.17550410330295563, "logits/chosen": 0.56562340259552, "logits/rejected": 0.4837728440761566, "logps/chosen": -1.4738211631774902, "logps/rejected": -3.4549827575683594, "loss": 0.5651, "nll_loss": 0.5475007891654968, "rewards/accuracies": 1.0, "rewards/chosen": -0.14738211035728455, "rewards/margins": 0.19811615347862244, "rewards/rejected": -0.345498263835907, "step": 3647 }, { "epoch": 9.987679671457906, "grad_norm": 3.858222246170044, "learning_rate": 5.002739726027398e-07, "log_odds_chosen": 3.2092549800872803, "log_odds_ratio": -0.29728445410728455, "logits/chosen": 1.001766324043274, "logits/rejected": 0.986621618270874, "logps/chosen": -2.2573230266571045, "logps/rejected": -5.274971961975098, "loss": 0.6346, "nll_loss": 0.6048979759216309, "rewards/accuracies": 0.75, "rewards/chosen": -0.22573229670524597, "rewards/margins": 0.30176490545272827, "rewards/rejected": -0.5274971723556519, "step": 3648 }, { "epoch": 9.990417522245037, "grad_norm": 3.221330165863037, "learning_rate": 5.001369863013698e-07, "log_odds_chosen": 2.0729990005493164, "log_odds_ratio": -0.21846133470535278, "logits/chosen": 0.7658221125602722, "logits/rejected": 0.6956538558006287, "logps/chosen": -2.059462547302246, "logps/rejected": -3.9777607917785645, "loss": 0.5743, "nll_loss": 0.5524477362632751, "rewards/accuracies": 1.0, "rewards/chosen": -0.20594629645347595, "rewards/margins": 0.19182980060577393, "rewards/rejected": -0.3977760672569275, "step": 3649 }, { "epoch": 9.99315537303217, "grad_norm": 6.268399715423584, "learning_rate": 5e-07, "log_odds_chosen": 2.044067144393921, "log_odds_ratio": -0.215678870677948, "logits/chosen": 1.4214736223220825, "logits/rejected": 1.397316336631775, "logps/chosen": -2.535433769226074, "logps/rejected": -4.42716646194458, "loss": 0.6146, "nll_loss": 0.5930063724517822, "rewards/accuracies": 0.875, "rewards/chosen": -0.2535434067249298, "rewards/margins": 0.18917325139045715, "rewards/rejected": -0.44271665811538696, "step": 3650 }, { "epoch": 9.995893223819301, "grad_norm": 3.4896328449249268, "learning_rate": 4.998630136986301e-07, "log_odds_chosen": 4.49727725982666, "log_odds_ratio": -0.16000749170780182, "logits/chosen": 0.9797465801239014, "logits/rejected": 0.9347633719444275, "logps/chosen": -1.7366900444030762, "logps/rejected": -6.0504865646362305, "loss": 0.5798, "nll_loss": 0.5637908577919006, "rewards/accuracies": 1.0, "rewards/chosen": -0.1736690104007721, "rewards/margins": 0.43137961626052856, "rewards/rejected": -0.605048656463623, "step": 3651 }, { "epoch": 9.998631074606434, "grad_norm": 3.6559839248657227, "learning_rate": 4.997260273972603e-07, "log_odds_chosen": 3.271646738052368, "log_odds_ratio": -0.22853142023086548, "logits/chosen": 1.0980157852172852, "logits/rejected": 1.1625770330429077, "logps/chosen": -2.204958438873291, "logps/rejected": -5.369739055633545, "loss": 0.5828, "nll_loss": 0.5599503517150879, "rewards/accuracies": 1.0, "rewards/chosen": -0.22049583494663239, "rewards/margins": 0.3164781332015991, "rewards/rejected": -0.5369739532470703, "step": 3652 }, { "epoch": 10.001368925393566, "grad_norm": 3.760836124420166, "learning_rate": 4.995890410958904e-07, "log_odds_chosen": 1.9823005199432373, "log_odds_ratio": -0.23749299347400665, "logits/chosen": 0.8700345754623413, "logits/rejected": 0.864855170249939, "logps/chosen": -2.005676746368408, "logps/rejected": -3.842752695083618, "loss": 0.5481, "nll_loss": 0.524315595626831, "rewards/accuracies": 1.0, "rewards/chosen": -0.20056767761707306, "rewards/margins": 0.183707594871521, "rewards/rejected": -0.38427528738975525, "step": 3653 }, { "epoch": 10.004106776180699, "grad_norm": 5.778580665588379, "learning_rate": 4.994520547945205e-07, "log_odds_chosen": 1.411260724067688, "log_odds_ratio": -0.24516192078590393, "logits/chosen": 0.8149799704551697, "logits/rejected": 0.7955653667449951, "logps/chosen": -2.302628993988037, "logps/rejected": -3.5686073303222656, "loss": 0.6524, "nll_loss": 0.6278387904167175, "rewards/accuracies": 1.0, "rewards/chosen": -0.2302629053592682, "rewards/margins": 0.1265978068113327, "rewards/rejected": -0.3568606972694397, "step": 3654 }, { "epoch": 10.00684462696783, "grad_norm": 3.328434705734253, "learning_rate": 4.993150684931507e-07, "log_odds_chosen": 3.4625051021575928, "log_odds_ratio": -0.16048768162727356, "logits/chosen": 0.9543395042419434, "logits/rejected": 0.8759682774543762, "logps/chosen": -1.726739764213562, "logps/rejected": -4.978488922119141, "loss": 0.6748, "nll_loss": 0.6587475538253784, "rewards/accuracies": 1.0, "rewards/chosen": -0.17267397046089172, "rewards/margins": 0.32517489790916443, "rewards/rejected": -0.49784886837005615, "step": 3655 }, { "epoch": 10.009582477754963, "grad_norm": 3.772782325744629, "learning_rate": 4.991780821917808e-07, "log_odds_chosen": 1.4170411825180054, "log_odds_ratio": -0.3707368075847626, "logits/chosen": 0.8651952743530273, "logits/rejected": 0.793481707572937, "logps/chosen": -1.4430633783340454, "logps/rejected": -2.710662364959717, "loss": 0.5729, "nll_loss": 0.5358708500862122, "rewards/accuracies": 0.875, "rewards/chosen": -0.14430634677410126, "rewards/margins": 0.12675991654396057, "rewards/rejected": -0.27106624841690063, "step": 3656 }, { "epoch": 10.012320328542094, "grad_norm": 3.8677830696105957, "learning_rate": 4.990410958904109e-07, "log_odds_chosen": 1.6066384315490723, "log_odds_ratio": -0.27654048800468445, "logits/chosen": 1.2080602645874023, "logits/rejected": 1.229773998260498, "logps/chosen": -2.091916561126709, "logps/rejected": -3.602454423904419, "loss": 0.6114, "nll_loss": 0.5837291479110718, "rewards/accuracies": 1.0, "rewards/chosen": -0.20919166505336761, "rewards/margins": 0.1510538011789322, "rewards/rejected": -0.3602454662322998, "step": 3657 }, { "epoch": 10.015058179329227, "grad_norm": 3.271313428878784, "learning_rate": 4.989041095890411e-07, "log_odds_chosen": 4.0860185623168945, "log_odds_ratio": -0.12518030405044556, "logits/chosen": 1.026004433631897, "logits/rejected": 1.044234037399292, "logps/chosen": -2.1970925331115723, "logps/rejected": -6.15886926651001, "loss": 0.6011, "nll_loss": 0.5885813236236572, "rewards/accuracies": 1.0, "rewards/chosen": -0.21970926225185394, "rewards/margins": 0.3961777091026306, "rewards/rejected": -0.615886926651001, "step": 3658 }, { "epoch": 10.017796030116358, "grad_norm": 3.170527696609497, "learning_rate": 4.987671232876712e-07, "log_odds_chosen": 2.2981808185577393, "log_odds_ratio": -0.15109792351722717, "logits/chosen": 0.7713737487792969, "logits/rejected": 0.6888392567634583, "logps/chosen": -1.9052852392196655, "logps/rejected": -3.998337745666504, "loss": 0.58, "nll_loss": 0.564857006072998, "rewards/accuracies": 1.0, "rewards/chosen": -0.19052854180335999, "rewards/margins": 0.20930524170398712, "rewards/rejected": -0.3998337388038635, "step": 3659 }, { "epoch": 10.020533880903491, "grad_norm": 4.444452285766602, "learning_rate": 4.986301369863014e-07, "log_odds_chosen": 1.3725993633270264, "log_odds_ratio": -0.4051457941532135, "logits/chosen": 1.046795129776001, "logits/rejected": 0.9953365921974182, "logps/chosen": -2.156108856201172, "logps/rejected": -3.417351722717285, "loss": 0.5692, "nll_loss": 0.528694212436676, "rewards/accuracies": 0.875, "rewards/chosen": -0.21561092138290405, "rewards/margins": 0.1261242777109146, "rewards/rejected": -0.34173518419265747, "step": 3660 }, { "epoch": 10.023271731690622, "grad_norm": 3.8699588775634766, "learning_rate": 4.984931506849315e-07, "log_odds_chosen": 1.2662107944488525, "log_odds_ratio": -0.3147745430469513, "logits/chosen": 0.9128319025039673, "logits/rejected": 0.8266900777816772, "logps/chosen": -2.087584972381592, "logps/rejected": -3.239684581756592, "loss": 0.5248, "nll_loss": 0.49333620071411133, "rewards/accuracies": 1.0, "rewards/chosen": -0.20875850319862366, "rewards/margins": 0.11520999670028687, "rewards/rejected": -0.3239684998989105, "step": 3661 }, { "epoch": 10.026009582477755, "grad_norm": 3.0374767780303955, "learning_rate": 4.983561643835616e-07, "log_odds_chosen": 2.9816577434539795, "log_odds_ratio": -0.1338081955909729, "logits/chosen": 0.8570653200149536, "logits/rejected": 0.7589281797409058, "logps/chosen": -1.4421374797821045, "logps/rejected": -4.12469482421875, "loss": 0.5478, "nll_loss": 0.5344657301902771, "rewards/accuracies": 1.0, "rewards/chosen": -0.1442137360572815, "rewards/margins": 0.2682557702064514, "rewards/rejected": -0.4124695062637329, "step": 3662 }, { "epoch": 10.028747433264886, "grad_norm": 2.9595797061920166, "learning_rate": 4.982191780821918e-07, "log_odds_chosen": 2.5467371940612793, "log_odds_ratio": -0.25303858518600464, "logits/chosen": 0.7919661998748779, "logits/rejected": 0.7940852642059326, "logps/chosen": -1.8308889865875244, "logps/rejected": -4.182004928588867, "loss": 0.6089, "nll_loss": 0.5836203694343567, "rewards/accuracies": 0.875, "rewards/chosen": -0.18308891355991364, "rewards/margins": 0.2351115643978119, "rewards/rejected": -0.4182004928588867, "step": 3663 }, { "epoch": 10.03148528405202, "grad_norm": 4.322836399078369, "learning_rate": 4.980821917808219e-07, "log_odds_chosen": 1.328794240951538, "log_odds_ratio": -0.3870953321456909, "logits/chosen": 0.8063796162605286, "logits/rejected": 0.8061426281929016, "logps/chosen": -2.307936668395996, "logps/rejected": -3.509594440460205, "loss": 0.5502, "nll_loss": 0.5114719867706299, "rewards/accuracies": 0.875, "rewards/chosen": -0.23079369962215424, "rewards/margins": 0.12016578018665314, "rewards/rejected": -0.3509594798088074, "step": 3664 }, { "epoch": 10.03422313483915, "grad_norm": 4.413971424102783, "learning_rate": 4.97945205479452e-07, "log_odds_chosen": 1.5086134672164917, "log_odds_ratio": -0.45494723320007324, "logits/chosen": 1.0164880752563477, "logits/rejected": 0.9840826988220215, "logps/chosen": -2.0263843536376953, "logps/rejected": -3.46274995803833, "loss": 0.615, "nll_loss": 0.5695505142211914, "rewards/accuracies": 0.875, "rewards/chosen": -0.2026384174823761, "rewards/margins": 0.1436365693807602, "rewards/rejected": -0.3462749719619751, "step": 3665 }, { "epoch": 10.036960985626283, "grad_norm": 3.308225154876709, "learning_rate": 4.978082191780822e-07, "log_odds_chosen": 3.3441970348358154, "log_odds_ratio": -0.2766212821006775, "logits/chosen": 1.0580127239227295, "logits/rejected": 1.049584984779358, "logps/chosen": -2.2235403060913086, "logps/rejected": -5.488234519958496, "loss": 0.6071, "nll_loss": 0.5793917179107666, "rewards/accuracies": 0.875, "rewards/chosen": -0.22235403954982758, "rewards/margins": 0.32646942138671875, "rewards/rejected": -0.5488234758377075, "step": 3666 }, { "epoch": 10.039698836413416, "grad_norm": 3.5804409980773926, "learning_rate": 4.976712328767123e-07, "log_odds_chosen": 1.8458327054977417, "log_odds_ratio": -0.2123393565416336, "logits/chosen": 0.87104731798172, "logits/rejected": 0.8470720052719116, "logps/chosen": -1.869626760482788, "logps/rejected": -3.546881914138794, "loss": 0.5375, "nll_loss": 0.5162769556045532, "rewards/accuracies": 1.0, "rewards/chosen": -0.18696269392967224, "rewards/margins": 0.16772548854351044, "rewards/rejected": -0.3546881675720215, "step": 3667 }, { "epoch": 10.042436687200547, "grad_norm": 3.1914453506469727, "learning_rate": 4.975342465753424e-07, "log_odds_chosen": 2.503945827484131, "log_odds_ratio": -0.24315635859966278, "logits/chosen": 0.8182876110076904, "logits/rejected": 0.8633109927177429, "logps/chosen": -1.9031901359558105, "logps/rejected": -4.28782320022583, "loss": 0.7267, "nll_loss": 0.702422559261322, "rewards/accuracies": 1.0, "rewards/chosen": -0.1903190165758133, "rewards/margins": 0.23846334218978882, "rewards/rejected": -0.4287823736667633, "step": 3668 }, { "epoch": 10.04517453798768, "grad_norm": 3.145888090133667, "learning_rate": 4.973972602739726e-07, "log_odds_chosen": 2.2122035026550293, "log_odds_ratio": -0.16110168397426605, "logits/chosen": 0.9485386610031128, "logits/rejected": 0.8913755416870117, "logps/chosen": -1.7983216047286987, "logps/rejected": -3.8395838737487793, "loss": 0.5386, "nll_loss": 0.5224783420562744, "rewards/accuracies": 1.0, "rewards/chosen": -0.17983214557170868, "rewards/margins": 0.2041262537240982, "rewards/rejected": -0.3839583992958069, "step": 3669 }, { "epoch": 10.047912388774812, "grad_norm": 3.256784439086914, "learning_rate": 4.972602739726027e-07, "log_odds_chosen": 2.1071698665618896, "log_odds_ratio": -0.19703829288482666, "logits/chosen": 0.7886616587638855, "logits/rejected": 0.7459832429885864, "logps/chosen": -1.6821300983428955, "logps/rejected": -3.6001555919647217, "loss": 0.5539, "nll_loss": 0.5342183113098145, "rewards/accuracies": 1.0, "rewards/chosen": -0.16821300983428955, "rewards/margins": 0.19180257618427277, "rewards/rejected": -0.3600156009197235, "step": 3670 }, { "epoch": 10.050650239561945, "grad_norm": 3.251676082611084, "learning_rate": 4.971232876712328e-07, "log_odds_chosen": 3.3846395015716553, "log_odds_ratio": -0.08658026158809662, "logits/chosen": 0.9262744188308716, "logits/rejected": 0.9354678392410278, "logps/chosen": -1.4407024383544922, "logps/rejected": -4.547788143157959, "loss": 0.5394, "nll_loss": 0.5306941866874695, "rewards/accuracies": 1.0, "rewards/chosen": -0.14407023787498474, "rewards/margins": 0.31070855259895325, "rewards/rejected": -0.454778790473938, "step": 3671 }, { "epoch": 10.053388090349076, "grad_norm": 5.696191787719727, "learning_rate": 4.96986301369863e-07, "log_odds_chosen": 2.746797561645508, "log_odds_ratio": -0.6048687696456909, "logits/chosen": 0.996762752532959, "logits/rejected": 0.9933710694313049, "logps/chosen": -2.5638608932495117, "logps/rejected": -5.218021392822266, "loss": 0.7054, "nll_loss": 0.6449443697929382, "rewards/accuracies": 0.875, "rewards/chosen": -0.2563861012458801, "rewards/margins": 0.26541608572006226, "rewards/rejected": -0.5218021869659424, "step": 3672 }, { "epoch": 10.056125941136209, "grad_norm": 7.317429542541504, "learning_rate": 4.968493150684931e-07, "log_odds_chosen": 1.212632179260254, "log_odds_ratio": -0.5447487831115723, "logits/chosen": 0.9335672855377197, "logits/rejected": 0.9374703168869019, "logps/chosen": -2.6616692543029785, "logps/rejected": -3.7241058349609375, "loss": 0.6165, "nll_loss": 0.5619962811470032, "rewards/accuracies": 0.875, "rewards/chosen": -0.26616695523262024, "rewards/margins": 0.10624365508556366, "rewards/rejected": -0.3724105954170227, "step": 3673 }, { "epoch": 10.05886379192334, "grad_norm": 7.150577068328857, "learning_rate": 4.967123287671233e-07, "log_odds_chosen": 2.5398831367492676, "log_odds_ratio": -0.21616481244564056, "logits/chosen": 0.9111098051071167, "logits/rejected": 0.8929134607315063, "logps/chosen": -1.773428201675415, "logps/rejected": -3.9988279342651367, "loss": 0.6481, "nll_loss": 0.6265150308609009, "rewards/accuracies": 0.875, "rewards/chosen": -0.17734283208847046, "rewards/margins": 0.22253994643688202, "rewards/rejected": -0.39988279342651367, "step": 3674 }, { "epoch": 10.061601642710473, "grad_norm": 3.0387139320373535, "learning_rate": 4.965753424657534e-07, "log_odds_chosen": 2.5877797603607178, "log_odds_ratio": -0.2854338586330414, "logits/chosen": 1.0365787744522095, "logits/rejected": 0.9861681461334229, "logps/chosen": -1.5156354904174805, "logps/rejected": -3.92805552482605, "loss": 0.599, "nll_loss": 0.5704232454299927, "rewards/accuracies": 1.0, "rewards/chosen": -0.15156354010105133, "rewards/margins": 0.24124205112457275, "rewards/rejected": -0.3928056061267853, "step": 3675 }, { "epoch": 10.064339493497604, "grad_norm": 5.153567314147949, "learning_rate": 4.964383561643836e-07, "log_odds_chosen": 2.9443416595458984, "log_odds_ratio": -0.3940584361553192, "logits/chosen": 1.0438623428344727, "logits/rejected": 1.0385760068893433, "logps/chosen": -2.7411646842956543, "logps/rejected": -5.491929054260254, "loss": 0.7054, "nll_loss": 0.6660032272338867, "rewards/accuracies": 0.75, "rewards/chosen": -0.27411648631095886, "rewards/margins": 0.27507641911506653, "rewards/rejected": -0.5491929054260254, "step": 3676 }, { "epoch": 10.067077344284737, "grad_norm": 6.288089752197266, "learning_rate": 4.963013698630137e-07, "log_odds_chosen": 1.250312328338623, "log_odds_ratio": -0.6402722597122192, "logits/chosen": 0.9251623749732971, "logits/rejected": 0.887745201587677, "logps/chosen": -2.573518753051758, "logps/rejected": -3.7356338500976562, "loss": 0.6695, "nll_loss": 0.6054337620735168, "rewards/accuracies": 0.875, "rewards/chosen": -0.2573518753051758, "rewards/margins": 0.11621153354644775, "rewards/rejected": -0.37356340885162354, "step": 3677 }, { "epoch": 10.069815195071868, "grad_norm": 5.617197036743164, "learning_rate": 4.961643835616438e-07, "log_odds_chosen": 1.5331159830093384, "log_odds_ratio": -0.3986217677593231, "logits/chosen": 0.7685673832893372, "logits/rejected": 0.7284992337226868, "logps/chosen": -2.00302791595459, "logps/rejected": -3.415966510772705, "loss": 0.7265, "nll_loss": 0.6866739392280579, "rewards/accuracies": 0.875, "rewards/chosen": -0.20030280947685242, "rewards/margins": 0.14129383862018585, "rewards/rejected": -0.34159666299819946, "step": 3678 }, { "epoch": 10.072553045859001, "grad_norm": 3.624382257461548, "learning_rate": 4.960273972602739e-07, "log_odds_chosen": 3.1145401000976562, "log_odds_ratio": -0.2515639364719391, "logits/chosen": 0.6158388257026672, "logits/rejected": 0.6135540008544922, "logps/chosen": -1.9063122272491455, "logps/rejected": -4.884623050689697, "loss": 0.5732, "nll_loss": 0.5480359792709351, "rewards/accuracies": 0.875, "rewards/chosen": -0.1906312108039856, "rewards/margins": 0.29783111810684204, "rewards/rejected": -0.48846229910850525, "step": 3679 }, { "epoch": 10.075290896646132, "grad_norm": 3.4874799251556396, "learning_rate": 4.958904109589041e-07, "log_odds_chosen": 2.120187997817993, "log_odds_ratio": -0.182794988155365, "logits/chosen": 0.8798202276229858, "logits/rejected": 0.850597620010376, "logps/chosen": -1.7066149711608887, "logps/rejected": -3.598945140838623, "loss": 0.5372, "nll_loss": 0.5188882946968079, "rewards/accuracies": 1.0, "rewards/chosen": -0.17066149413585663, "rewards/margins": 0.18923304975032806, "rewards/rejected": -0.3598945736885071, "step": 3680 }, { "epoch": 10.078028747433265, "grad_norm": 6.663425445556641, "learning_rate": 4.957534246575342e-07, "log_odds_chosen": 0.9013068675994873, "log_odds_ratio": -0.5443174242973328, "logits/chosen": 0.9235396981239319, "logits/rejected": 0.959128201007843, "logps/chosen": -2.9593536853790283, "logps/rejected": -3.79830002784729, "loss": 0.6111, "nll_loss": 0.556688129901886, "rewards/accuracies": 0.875, "rewards/chosen": -0.29593539237976074, "rewards/margins": 0.08389462530612946, "rewards/rejected": -0.379830002784729, "step": 3681 }, { "epoch": 10.080766598220396, "grad_norm": 7.124865531921387, "learning_rate": 4.956164383561643e-07, "log_odds_chosen": 1.0508109331130981, "log_odds_ratio": -0.66920006275177, "logits/chosen": 1.003671646118164, "logits/rejected": 0.954833984375, "logps/chosen": -2.4960439205169678, "logps/rejected": -3.3527557849884033, "loss": 0.6041, "nll_loss": 0.5371633172035217, "rewards/accuracies": 0.875, "rewards/chosen": -0.24960438907146454, "rewards/margins": 0.08567120134830475, "rewards/rejected": -0.3352755904197693, "step": 3682 }, { "epoch": 10.08350444900753, "grad_norm": 4.129059791564941, "learning_rate": 4.954794520547945e-07, "log_odds_chosen": 2.6003620624542236, "log_odds_ratio": -0.40289875864982605, "logits/chosen": 0.8896070718765259, "logits/rejected": 0.8829156160354614, "logps/chosen": -2.2576546669006348, "logps/rejected": -4.609234809875488, "loss": 0.6998, "nll_loss": 0.6594989895820618, "rewards/accuracies": 0.75, "rewards/chosen": -0.22576548159122467, "rewards/margins": 0.2351580411195755, "rewards/rejected": -0.46092352271080017, "step": 3683 }, { "epoch": 10.08624229979466, "grad_norm": 7.614598274230957, "learning_rate": 4.953424657534246e-07, "log_odds_chosen": 1.8973790407180786, "log_odds_ratio": -0.6085788607597351, "logits/chosen": 1.0131410360336304, "logits/rejected": 1.0238007307052612, "logps/chosen": -2.9977433681488037, "logps/rejected": -4.669576644897461, "loss": 0.6964, "nll_loss": 0.635547399520874, "rewards/accuracies": 0.5, "rewards/chosen": -0.29977431893348694, "rewards/margins": 0.16718333959579468, "rewards/rejected": -0.466957688331604, "step": 3684 }, { "epoch": 10.088980150581794, "grad_norm": 3.357591390609741, "learning_rate": 4.952054794520547e-07, "log_odds_chosen": 1.4845142364501953, "log_odds_ratio": -0.3131633400917053, "logits/chosen": 0.8107723593711853, "logits/rejected": 0.7989535927772522, "logps/chosen": -1.8271384239196777, "logps/rejected": -3.149240016937256, "loss": 0.4897, "nll_loss": 0.45839959383010864, "rewards/accuracies": 0.875, "rewards/chosen": -0.18271386623382568, "rewards/margins": 0.13221018016338348, "rewards/rejected": -0.3149240016937256, "step": 3685 }, { "epoch": 10.091718001368925, "grad_norm": 4.6842756271362305, "learning_rate": 4.950684931506849e-07, "log_odds_chosen": 2.2544126510620117, "log_odds_ratio": -0.41155070066452026, "logits/chosen": 0.8172765970230103, "logits/rejected": 0.8345030546188354, "logps/chosen": -2.1778929233551025, "logps/rejected": -4.249393463134766, "loss": 0.65, "nll_loss": 0.6088389158248901, "rewards/accuracies": 0.875, "rewards/chosen": -0.21778929233551025, "rewards/margins": 0.20715001225471497, "rewards/rejected": -0.4249393343925476, "step": 3686 }, { "epoch": 10.094455852156058, "grad_norm": 3.518716335296631, "learning_rate": 4.94931506849315e-07, "log_odds_chosen": 2.991748571395874, "log_odds_ratio": -0.36927762627601624, "logits/chosen": 0.685447096824646, "logits/rejected": 0.6927957534790039, "logps/chosen": -2.3199868202209473, "logps/rejected": -5.178267478942871, "loss": 0.705, "nll_loss": 0.6680965423583984, "rewards/accuracies": 0.75, "rewards/chosen": -0.23199869692325592, "rewards/margins": 0.28582799434661865, "rewards/rejected": -0.5178266763687134, "step": 3687 }, { "epoch": 10.097193702943189, "grad_norm": 3.273803472518921, "learning_rate": 4.947945205479452e-07, "log_odds_chosen": 3.154733896255493, "log_odds_ratio": -0.20590941607952118, "logits/chosen": 0.9978571534156799, "logits/rejected": 0.95878666639328, "logps/chosen": -1.385969877243042, "logps/rejected": -4.2474365234375, "loss": 0.5544, "nll_loss": 0.533814549446106, "rewards/accuracies": 1.0, "rewards/chosen": -0.13859699666500092, "rewards/margins": 0.2861466705799103, "rewards/rejected": -0.4247436821460724, "step": 3688 }, { "epoch": 10.099931553730322, "grad_norm": 3.501477003097534, "learning_rate": 4.946575342465753e-07, "log_odds_chosen": 1.6488947868347168, "log_odds_ratio": -0.3226079046726227, "logits/chosen": 0.7923022508621216, "logits/rejected": 0.7519515752792358, "logps/chosen": -2.2383873462677, "logps/rejected": -3.684113025665283, "loss": 0.5958, "nll_loss": 0.5635785460472107, "rewards/accuracies": 0.75, "rewards/chosen": -0.22383873164653778, "rewards/margins": 0.14457260072231293, "rewards/rejected": -0.3684113323688507, "step": 3689 }, { "epoch": 10.102669404517453, "grad_norm": 4.302150726318359, "learning_rate": 4.945205479452055e-07, "log_odds_chosen": 3.2268619537353516, "log_odds_ratio": -0.0735766738653183, "logits/chosen": 1.3352421522140503, "logits/rejected": 1.4108171463012695, "logps/chosen": -2.0789661407470703, "logps/rejected": -5.166614532470703, "loss": 0.5797, "nll_loss": 0.5723201036453247, "rewards/accuracies": 1.0, "rewards/chosen": -0.2078966200351715, "rewards/margins": 0.30876487493515015, "rewards/rejected": -0.516661524772644, "step": 3690 }, { "epoch": 10.105407255304586, "grad_norm": 8.925521850585938, "learning_rate": 4.943835616438356e-07, "log_odds_chosen": 1.8651232719421387, "log_odds_ratio": -0.6259769201278687, "logits/chosen": 1.1660432815551758, "logits/rejected": 1.16680908203125, "logps/chosen": -2.6690244674682617, "logps/rejected": -4.327526092529297, "loss": 0.6293, "nll_loss": 0.5666841268539429, "rewards/accuracies": 0.625, "rewards/chosen": -0.26690244674682617, "rewards/margins": 0.16585016250610352, "rewards/rejected": -0.4327526390552521, "step": 3691 }, { "epoch": 10.108145106091717, "grad_norm": 3.4681694507598877, "learning_rate": 4.942465753424657e-07, "log_odds_chosen": 1.3828011751174927, "log_odds_ratio": -0.3026697337627411, "logits/chosen": 0.992318868637085, "logits/rejected": 1.0408681631088257, "logps/chosen": -2.036104440689087, "logps/rejected": -3.3160629272460938, "loss": 0.5857, "nll_loss": 0.5553876757621765, "rewards/accuracies": 1.0, "rewards/chosen": -0.20361045002937317, "rewards/margins": 0.12799584865570068, "rewards/rejected": -0.33160632848739624, "step": 3692 }, { "epoch": 10.11088295687885, "grad_norm": 3.545454263687134, "learning_rate": 4.941095890410958e-07, "log_odds_chosen": 1.2930010557174683, "log_odds_ratio": -0.2726480960845947, "logits/chosen": 0.8474820256233215, "logits/rejected": 0.7539060115814209, "logps/chosen": -1.805873990058899, "logps/rejected": -2.9503121376037598, "loss": 0.6326, "nll_loss": 0.6053375601768494, "rewards/accuracies": 1.0, "rewards/chosen": -0.18058741092681885, "rewards/margins": 0.114443838596344, "rewards/rejected": -0.29503124952316284, "step": 3693 }, { "epoch": 10.113620807665983, "grad_norm": 5.938095569610596, "learning_rate": 4.93972602739726e-07, "log_odds_chosen": 1.5821348428726196, "log_odds_ratio": -0.36681365966796875, "logits/chosen": 1.0605195760726929, "logits/rejected": 1.0166072845458984, "logps/chosen": -2.2125027179718018, "logps/rejected": -3.7050137519836426, "loss": 0.6043, "nll_loss": 0.5676050186157227, "rewards/accuracies": 0.75, "rewards/chosen": -0.2212502807378769, "rewards/margins": 0.1492510735988617, "rewards/rejected": -0.3705013394355774, "step": 3694 }, { "epoch": 10.116358658453114, "grad_norm": 4.257005214691162, "learning_rate": 4.938356164383561e-07, "log_odds_chosen": 2.6196320056915283, "log_odds_ratio": -0.3562214970588684, "logits/chosen": 1.0602699518203735, "logits/rejected": 1.0730090141296387, "logps/chosen": -2.33066463470459, "logps/rejected": -4.865866184234619, "loss": 0.6053, "nll_loss": 0.5696581602096558, "rewards/accuracies": 0.875, "rewards/chosen": -0.23306646943092346, "rewards/margins": 0.2535201907157898, "rewards/rejected": -0.48658663034439087, "step": 3695 }, { "epoch": 10.119096509240247, "grad_norm": 9.300966262817383, "learning_rate": 4.936986301369862e-07, "log_odds_chosen": 2.0921719074249268, "log_odds_ratio": -0.410478800535202, "logits/chosen": 0.8945525884628296, "logits/rejected": 0.8023428916931152, "logps/chosen": -1.807896614074707, "logps/rejected": -3.653421640396118, "loss": 0.6991, "nll_loss": 0.6580792665481567, "rewards/accuracies": 0.875, "rewards/chosen": -0.18078966438770294, "rewards/margins": 0.18455252051353455, "rewards/rejected": -0.3653421998023987, "step": 3696 }, { "epoch": 10.121834360027378, "grad_norm": 4.381283283233643, "learning_rate": 4.935616438356164e-07, "log_odds_chosen": 2.267836570739746, "log_odds_ratio": -0.4448767900466919, "logits/chosen": 0.9467998743057251, "logits/rejected": 0.9654380083084106, "logps/chosen": -2.36006236076355, "logps/rejected": -4.495285987854004, "loss": 0.5886, "nll_loss": 0.5441441535949707, "rewards/accuracies": 0.875, "rewards/chosen": -0.2360062599182129, "rewards/margins": 0.21352235972881317, "rewards/rejected": -0.44952860474586487, "step": 3697 }, { "epoch": 10.124572210814511, "grad_norm": 3.4921271800994873, "learning_rate": 4.934246575342465e-07, "log_odds_chosen": 2.363211154937744, "log_odds_ratio": -0.23084765672683716, "logits/chosen": 0.9705031514167786, "logits/rejected": 1.0053342580795288, "logps/chosen": -1.9771509170532227, "logps/rejected": -4.198599815368652, "loss": 0.6443, "nll_loss": 0.6212165951728821, "rewards/accuracies": 1.0, "rewards/chosen": -0.19771508872509003, "rewards/margins": 0.22214487195014954, "rewards/rejected": -0.41985997557640076, "step": 3698 }, { "epoch": 10.127310061601642, "grad_norm": 3.317187547683716, "learning_rate": 4.932876712328766e-07, "log_odds_chosen": 2.2630276679992676, "log_odds_ratio": -0.18108902871608734, "logits/chosen": 0.9110220670700073, "logits/rejected": 0.8918670415878296, "logps/chosen": -1.4399056434631348, "logps/rejected": -3.4813637733459473, "loss": 0.5856, "nll_loss": 0.5674693584442139, "rewards/accuracies": 1.0, "rewards/chosen": -0.14399056136608124, "rewards/margins": 0.20414584875106812, "rewards/rejected": -0.34813639521598816, "step": 3699 }, { "epoch": 10.130047912388775, "grad_norm": 3.9993324279785156, "learning_rate": 4.931506849315068e-07, "log_odds_chosen": 3.175534248352051, "log_odds_ratio": -0.13452237844467163, "logits/chosen": 1.195146083831787, "logits/rejected": 1.1942267417907715, "logps/chosen": -1.9785306453704834, "logps/rejected": -4.987725734710693, "loss": 0.5956, "nll_loss": 0.5821741819381714, "rewards/accuracies": 1.0, "rewards/chosen": -0.19785307347774506, "rewards/margins": 0.3009195327758789, "rewards/rejected": -0.4987725615501404, "step": 3700 }, { "epoch": 10.132785763175907, "grad_norm": 2.789355754852295, "learning_rate": 4.930136986301369e-07, "log_odds_chosen": 5.693426132202148, "log_odds_ratio": -0.07135959714651108, "logits/chosen": 0.7371798753738403, "logits/rejected": 0.6662510633468628, "logps/chosen": -2.096226692199707, "logps/rejected": -7.589019298553467, "loss": 0.7054, "nll_loss": 0.6983098983764648, "rewards/accuracies": 1.0, "rewards/chosen": -0.20962268114089966, "rewards/margins": 0.5492793321609497, "rewards/rejected": -0.7589020133018494, "step": 3701 }, { "epoch": 10.13552361396304, "grad_norm": 5.949283599853516, "learning_rate": 4.928767123287671e-07, "log_odds_chosen": 1.6431901454925537, "log_odds_ratio": -0.39995038509368896, "logits/chosen": 1.0641433000564575, "logits/rejected": 1.0221810340881348, "logps/chosen": -2.426119089126587, "logps/rejected": -3.930655002593994, "loss": 0.6518, "nll_loss": 0.6117850542068481, "rewards/accuracies": 0.75, "rewards/chosen": -0.24261189997196198, "rewards/margins": 0.150453582406044, "rewards/rejected": -0.39306551218032837, "step": 3702 }, { "epoch": 10.13826146475017, "grad_norm": 5.158544063568115, "learning_rate": 4.927397260273972e-07, "log_odds_chosen": 3.6485705375671387, "log_odds_ratio": -0.19845640659332275, "logits/chosen": 0.819037675857544, "logits/rejected": 0.7823628187179565, "logps/chosen": -1.8010313510894775, "logps/rejected": -5.3179826736450195, "loss": 0.6437, "nll_loss": 0.6238994598388672, "rewards/accuracies": 1.0, "rewards/chosen": -0.1801031529903412, "rewards/margins": 0.35169512033462524, "rewards/rejected": -0.531798243522644, "step": 3703 }, { "epoch": 10.140999315537304, "grad_norm": 3.575913906097412, "learning_rate": 4.926027397260273e-07, "log_odds_chosen": 1.349977731704712, "log_odds_ratio": -0.3490920662879944, "logits/chosen": 0.8782913684844971, "logits/rejected": 0.811747670173645, "logps/chosen": -1.52681303024292, "logps/rejected": -2.669759750366211, "loss": 0.5325, "nll_loss": 0.4975495934486389, "rewards/accuracies": 1.0, "rewards/chosen": -0.15268130600452423, "rewards/margins": 0.11429467052221298, "rewards/rejected": -0.2669759690761566, "step": 3704 }, { "epoch": 10.143737166324435, "grad_norm": 4.326502323150635, "learning_rate": 4.924657534246575e-07, "log_odds_chosen": 2.9068117141723633, "log_odds_ratio": -0.17328250408172607, "logits/chosen": 1.1722533702850342, "logits/rejected": 1.1835262775421143, "logps/chosen": -2.9852488040924072, "logps/rejected": -5.743491172790527, "loss": 0.6052, "nll_loss": 0.587916910648346, "rewards/accuracies": 1.0, "rewards/chosen": -0.2985248565673828, "rewards/margins": 0.27582424879074097, "rewards/rejected": -0.5743491649627686, "step": 3705 }, { "epoch": 10.146475017111568, "grad_norm": 3.167604684829712, "learning_rate": 4.923287671232876e-07, "log_odds_chosen": 3.164175033569336, "log_odds_ratio": -0.22319097816944122, "logits/chosen": 0.6644378900527954, "logits/rejected": 0.6772673726081848, "logps/chosen": -1.327473759651184, "logps/rejected": -4.249022006988525, "loss": 0.6551, "nll_loss": 0.6328221559524536, "rewards/accuracies": 1.0, "rewards/chosen": -0.13274738192558289, "rewards/margins": 0.29215478897094727, "rewards/rejected": -0.42490217089653015, "step": 3706 }, { "epoch": 10.149212867898699, "grad_norm": 3.350335121154785, "learning_rate": 4.921917808219179e-07, "log_odds_chosen": 1.5695710182189941, "log_odds_ratio": -0.27663731575012207, "logits/chosen": 0.9076581001281738, "logits/rejected": 0.8278435468673706, "logps/chosen": -2.123042583465576, "logps/rejected": -3.5558724403381348, "loss": 0.5859, "nll_loss": 0.5582706928253174, "rewards/accuracies": 1.0, "rewards/chosen": -0.2123042643070221, "rewards/margins": 0.14328300952911377, "rewards/rejected": -0.3555872440338135, "step": 3707 }, { "epoch": 10.151950718685832, "grad_norm": 3.048382520675659, "learning_rate": 4.92054794520548e-07, "log_odds_chosen": 2.8507447242736816, "log_odds_ratio": -0.12450796365737915, "logits/chosen": 1.1095831394195557, "logits/rejected": 1.1557869911193848, "logps/chosen": -2.139317512512207, "logps/rejected": -4.799023628234863, "loss": 0.593, "nll_loss": 0.5805326700210571, "rewards/accuracies": 1.0, "rewards/chosen": -0.21393173933029175, "rewards/margins": 0.2659706175327301, "rewards/rejected": -0.47990235686302185, "step": 3708 }, { "epoch": 10.154688569472963, "grad_norm": 3.2423095703125, "learning_rate": 4.91917808219178e-07, "log_odds_chosen": 3.135014295578003, "log_odds_ratio": -0.12143466621637344, "logits/chosen": 1.1968872547149658, "logits/rejected": 1.1970595121383667, "logps/chosen": -1.4494162797927856, "logps/rejected": -4.168849945068359, "loss": 0.5086, "nll_loss": 0.49642321467399597, "rewards/accuracies": 1.0, "rewards/chosen": -0.14494162797927856, "rewards/margins": 0.2719433903694153, "rewards/rejected": -0.41688498854637146, "step": 3709 }, { "epoch": 10.157426420260096, "grad_norm": 4.1612324714660645, "learning_rate": 4.917808219178081e-07, "log_odds_chosen": 2.0354835987091064, "log_odds_ratio": -0.20898541808128357, "logits/chosen": 0.7574429512023926, "logits/rejected": 0.7739842534065247, "logps/chosen": -2.661271572113037, "logps/rejected": -4.56433629989624, "loss": 0.6435, "nll_loss": 0.6225738525390625, "rewards/accuracies": 1.0, "rewards/chosen": -0.2661271393299103, "rewards/margins": 0.19030649960041046, "rewards/rejected": -0.45643365383148193, "step": 3710 }, { "epoch": 10.160164271047227, "grad_norm": 3.25101375579834, "learning_rate": 4.916438356164384e-07, "log_odds_chosen": 2.4048495292663574, "log_odds_ratio": -0.2179599106311798, "logits/chosen": 0.9383249878883362, "logits/rejected": 0.8352793455123901, "logps/chosen": -2.0811290740966797, "logps/rejected": -4.3202385902404785, "loss": 0.6197, "nll_loss": 0.5978910326957703, "rewards/accuracies": 1.0, "rewards/chosen": -0.208112895488739, "rewards/margins": 0.22391092777252197, "rewards/rejected": -0.432023823261261, "step": 3711 }, { "epoch": 10.16290212183436, "grad_norm": 3.9748687744140625, "learning_rate": 4.915068493150685e-07, "log_odds_chosen": 3.915825843811035, "log_odds_ratio": -0.15356142818927765, "logits/chosen": 0.909420371055603, "logits/rejected": 0.894509494304657, "logps/chosen": -1.7872164249420166, "logps/rejected": -5.5353193283081055, "loss": 0.5862, "nll_loss": 0.5708053112030029, "rewards/accuracies": 1.0, "rewards/chosen": -0.17872163653373718, "rewards/margins": 0.3748103082180023, "rewards/rejected": -0.5535320043563843, "step": 3712 }, { "epoch": 10.165639972621491, "grad_norm": 6.336028575897217, "learning_rate": 4.913698630136986e-07, "log_odds_chosen": 2.7753119468688965, "log_odds_ratio": -0.4091944098472595, "logits/chosen": 1.080878496170044, "logits/rejected": 1.0900675058364868, "logps/chosen": -2.2887964248657227, "logps/rejected": -4.956148624420166, "loss": 0.7861, "nll_loss": 0.7452139258384705, "rewards/accuracies": 0.625, "rewards/chosen": -0.2288796305656433, "rewards/margins": 0.2667351961135864, "rewards/rejected": -0.49561482667922974, "step": 3713 }, { "epoch": 10.168377823408624, "grad_norm": 7.4613728523254395, "learning_rate": 4.912328767123288e-07, "log_odds_chosen": 1.673044204711914, "log_odds_ratio": -0.4736878275871277, "logits/chosen": 0.846605658531189, "logits/rejected": 0.810286819934845, "logps/chosen": -1.967389702796936, "logps/rejected": -3.4934468269348145, "loss": 0.6508, "nll_loss": 0.6033889055252075, "rewards/accuracies": 0.75, "rewards/chosen": -0.19673895835876465, "rewards/margins": 0.15260574221611023, "rewards/rejected": -0.34934473037719727, "step": 3714 }, { "epoch": 10.171115674195756, "grad_norm": 3.1697371006011963, "learning_rate": 4.910958904109589e-07, "log_odds_chosen": 4.4645538330078125, "log_odds_ratio": -0.12776020169258118, "logits/chosen": 0.7706959247589111, "logits/rejected": 0.7455927133560181, "logps/chosen": -2.2291359901428223, "logps/rejected": -6.564742088317871, "loss": 0.628, "nll_loss": 0.6152068376541138, "rewards/accuracies": 1.0, "rewards/chosen": -0.22291362285614014, "rewards/margins": 0.4335605502128601, "rewards/rejected": -0.6564741730690002, "step": 3715 }, { "epoch": 10.173853524982889, "grad_norm": 3.5861167907714844, "learning_rate": 4.909589041095891e-07, "log_odds_chosen": 2.0052525997161865, "log_odds_ratio": -0.23273684084415436, "logits/chosen": 0.710036039352417, "logits/rejected": 0.6329967975616455, "logps/chosen": -1.2467312812805176, "logps/rejected": -3.00077486038208, "loss": 0.5243, "nll_loss": 0.5010104179382324, "rewards/accuracies": 1.0, "rewards/chosen": -0.12467312067747116, "rewards/margins": 0.17540434002876282, "rewards/rejected": -0.3000774681568146, "step": 3716 }, { "epoch": 10.17659137577002, "grad_norm": 3.246208667755127, "learning_rate": 4.908219178082192e-07, "log_odds_chosen": 3.7121291160583496, "log_odds_ratio": -0.2281980812549591, "logits/chosen": 0.8595240116119385, "logits/rejected": 0.7741104960441589, "logps/chosen": -1.6657114028930664, "logps/rejected": -5.218580722808838, "loss": 0.5978, "nll_loss": 0.5750269889831543, "rewards/accuracies": 0.875, "rewards/chosen": -0.16657114028930664, "rewards/margins": 0.35528695583343506, "rewards/rejected": -0.5218580961227417, "step": 3717 }, { "epoch": 10.179329226557153, "grad_norm": 3.3535573482513428, "learning_rate": 4.906849315068493e-07, "log_odds_chosen": 2.1170971393585205, "log_odds_ratio": -0.27712979912757874, "logits/chosen": 0.8160920143127441, "logits/rejected": 0.8537349700927734, "logps/chosen": -1.6516025066375732, "logps/rejected": -3.446648120880127, "loss": 0.5307, "nll_loss": 0.5030226707458496, "rewards/accuracies": 0.75, "rewards/chosen": -0.16516023874282837, "rewards/margins": 0.17950454354286194, "rewards/rejected": -0.3446648120880127, "step": 3718 }, { "epoch": 10.182067077344286, "grad_norm": 3.327965021133423, "learning_rate": 4.905479452054795e-07, "log_odds_chosen": 1.8342628479003906, "log_odds_ratio": -0.19559776782989502, "logits/chosen": 1.0194339752197266, "logits/rejected": 1.0143733024597168, "logps/chosen": -1.7796934843063354, "logps/rejected": -3.439608573913574, "loss": 0.4825, "nll_loss": 0.4629245698451996, "rewards/accuracies": 1.0, "rewards/chosen": -0.17796936631202698, "rewards/margins": 0.16599152982234955, "rewards/rejected": -0.34396088123321533, "step": 3719 }, { "epoch": 10.184804928131417, "grad_norm": 4.112057209014893, "learning_rate": 4.904109589041096e-07, "log_odds_chosen": 3.149081230163574, "log_odds_ratio": -0.20683836936950684, "logits/chosen": 0.5414260029792786, "logits/rejected": 0.46074867248535156, "logps/chosen": -1.9438122510910034, "logps/rejected": -4.950610160827637, "loss": 0.6618, "nll_loss": 0.6411470174789429, "rewards/accuracies": 1.0, "rewards/chosen": -0.1943812370300293, "rewards/margins": 0.3006798326969147, "rewards/rejected": -0.4950610399246216, "step": 3720 }, { "epoch": 10.18754277891855, "grad_norm": 3.3277251720428467, "learning_rate": 4.902739726027398e-07, "log_odds_chosen": 2.2883365154266357, "log_odds_ratio": -0.18245594203472137, "logits/chosen": 0.9600581526756287, "logits/rejected": 0.9510051608085632, "logps/chosen": -2.0409746170043945, "logps/rejected": -4.183133602142334, "loss": 0.781, "nll_loss": 0.7627726197242737, "rewards/accuracies": 1.0, "rewards/chosen": -0.20409747958183289, "rewards/margins": 0.21421590447425842, "rewards/rejected": -0.4183133542537689, "step": 3721 }, { "epoch": 10.190280629705681, "grad_norm": 5.32500696182251, "learning_rate": 4.901369863013699e-07, "log_odds_chosen": 3.300022602081299, "log_odds_ratio": -0.11786912381649017, "logits/chosen": 0.7917638421058655, "logits/rejected": 0.7317028045654297, "logps/chosen": -1.9609832763671875, "logps/rejected": -5.005830764770508, "loss": 0.6346, "nll_loss": 0.622802197933197, "rewards/accuracies": 1.0, "rewards/chosen": -0.19609832763671875, "rewards/margins": 0.3044847846031189, "rewards/rejected": -0.5005831718444824, "step": 3722 }, { "epoch": 10.193018480492814, "grad_norm": 4.8416428565979, "learning_rate": 4.9e-07, "log_odds_chosen": 2.2753260135650635, "log_odds_ratio": -0.4118178188800812, "logits/chosen": 0.7529302835464478, "logits/rejected": 0.7661324739456177, "logps/chosen": -2.5365302562713623, "logps/rejected": -4.69539737701416, "loss": 0.5697, "nll_loss": 0.5285641551017761, "rewards/accuracies": 0.75, "rewards/chosen": -0.25365304946899414, "rewards/margins": 0.2158866971731186, "rewards/rejected": -0.46953973174095154, "step": 3723 }, { "epoch": 10.195756331279945, "grad_norm": 5.455036640167236, "learning_rate": 4.898630136986301e-07, "log_odds_chosen": 2.897469997406006, "log_odds_ratio": -0.40664732456207275, "logits/chosen": 0.8440357446670532, "logits/rejected": 0.9278759956359863, "logps/chosen": -2.990645170211792, "logps/rejected": -5.73102331161499, "loss": 0.7801, "nll_loss": 0.7394438982009888, "rewards/accuracies": 0.875, "rewards/chosen": -0.2990645170211792, "rewards/margins": 0.27403780817985535, "rewards/rejected": -0.5731023550033569, "step": 3724 }, { "epoch": 10.198494182067078, "grad_norm": 3.0291123390197754, "learning_rate": 4.897260273972603e-07, "log_odds_chosen": 2.4508299827575684, "log_odds_ratio": -0.1598513275384903, "logits/chosen": 0.966677725315094, "logits/rejected": 1.0084737539291382, "logps/chosen": -1.5472559928894043, "logps/rejected": -3.786156177520752, "loss": 0.5695, "nll_loss": 0.5534883141517639, "rewards/accuracies": 1.0, "rewards/chosen": -0.1547255963087082, "rewards/margins": 0.223890021443367, "rewards/rejected": -0.3786156177520752, "step": 3725 }, { "epoch": 10.20123203285421, "grad_norm": 3.3329975605010986, "learning_rate": 4.895890410958904e-07, "log_odds_chosen": 3.111203670501709, "log_odds_ratio": -0.10785213112831116, "logits/chosen": 1.003218173980713, "logits/rejected": 0.9835805892944336, "logps/chosen": -1.9563490152359009, "logps/rejected": -4.863069534301758, "loss": 0.5662, "nll_loss": 0.5554088950157166, "rewards/accuracies": 1.0, "rewards/chosen": -0.1956349015235901, "rewards/margins": 0.29067206382751465, "rewards/rejected": -0.48630696535110474, "step": 3726 }, { "epoch": 10.203969883641342, "grad_norm": 5.5854411125183105, "learning_rate": 4.894520547945205e-07, "log_odds_chosen": 0.582766056060791, "log_odds_ratio": -0.7332755327224731, "logits/chosen": 0.9831843376159668, "logits/rejected": 1.012727975845337, "logps/chosen": -2.762230157852173, "logps/rejected": -3.2881979942321777, "loss": 0.6702, "nll_loss": 0.5969025492668152, "rewards/accuracies": 0.75, "rewards/chosen": -0.27622300386428833, "rewards/margins": 0.052596814930438995, "rewards/rejected": -0.32881981134414673, "step": 3727 }, { "epoch": 10.206707734428473, "grad_norm": 3.3801188468933105, "learning_rate": 4.893150684931507e-07, "log_odds_chosen": 2.3756933212280273, "log_odds_ratio": -0.2024160921573639, "logits/chosen": 0.792798638343811, "logits/rejected": 0.7119677066802979, "logps/chosen": -1.3718860149383545, "logps/rejected": -3.4623286724090576, "loss": 0.5553, "nll_loss": 0.5350581407546997, "rewards/accuracies": 1.0, "rewards/chosen": -0.1371886134147644, "rewards/margins": 0.2090442180633545, "rewards/rejected": -0.3462328314781189, "step": 3728 }, { "epoch": 10.209445585215606, "grad_norm": 3.640479326248169, "learning_rate": 4.891780821917808e-07, "log_odds_chosen": 3.4290075302124023, "log_odds_ratio": -0.1853659749031067, "logits/chosen": 0.7828930616378784, "logits/rejected": 0.8498413562774658, "logps/chosen": -1.826197862625122, "logps/rejected": -4.984941005706787, "loss": 0.5764, "nll_loss": 0.5578614473342896, "rewards/accuracies": 1.0, "rewards/chosen": -0.18261978030204773, "rewards/margins": 0.3158743679523468, "rewards/rejected": -0.49849411845207214, "step": 3729 }, { "epoch": 10.212183436002737, "grad_norm": 3.8676581382751465, "learning_rate": 4.89041095890411e-07, "log_odds_chosen": 1.5179436206817627, "log_odds_ratio": -0.43953877687454224, "logits/chosen": 1.0400718450546265, "logits/rejected": 1.013280987739563, "logps/chosen": -1.8969693183898926, "logps/rejected": -3.3251914978027344, "loss": 0.6648, "nll_loss": 0.6208536624908447, "rewards/accuracies": 0.875, "rewards/chosen": -0.18969692289829254, "rewards/margins": 0.1428222358226776, "rewards/rejected": -0.33251917362213135, "step": 3730 }, { "epoch": 10.21492128678987, "grad_norm": 3.869389295578003, "learning_rate": 4.889041095890411e-07, "log_odds_chosen": 1.9444527626037598, "log_odds_ratio": -0.29279038310050964, "logits/chosen": 0.9883759021759033, "logits/rejected": 0.9578933715820312, "logps/chosen": -1.5281398296356201, "logps/rejected": -3.2433154582977295, "loss": 0.5701, "nll_loss": 0.5408190488815308, "rewards/accuracies": 0.875, "rewards/chosen": -0.15281397104263306, "rewards/margins": 0.17151758074760437, "rewards/rejected": -0.3243315517902374, "step": 3731 }, { "epoch": 10.217659137577002, "grad_norm": 3.7225282192230225, "learning_rate": 4.887671232876712e-07, "log_odds_chosen": 3.6910927295684814, "log_odds_ratio": -0.18715272843837738, "logits/chosen": 1.0894286632537842, "logits/rejected": 1.0802973508834839, "logps/chosen": -1.890344500541687, "logps/rejected": -5.423979759216309, "loss": 0.6651, "nll_loss": 0.646344006061554, "rewards/accuracies": 0.875, "rewards/chosen": -0.18903446197509766, "rewards/margins": 0.3533634841442108, "rewards/rejected": -0.5423979759216309, "step": 3732 }, { "epoch": 10.220396988364135, "grad_norm": 4.267350196838379, "learning_rate": 4.886301369863014e-07, "log_odds_chosen": 2.1551809310913086, "log_odds_ratio": -0.4095434546470642, "logits/chosen": 1.2532033920288086, "logits/rejected": 1.2567684650421143, "logps/chosen": -2.0525779724121094, "logps/rejected": -4.04209566116333, "loss": 0.5512, "nll_loss": 0.5102232694625854, "rewards/accuracies": 0.875, "rewards/chosen": -0.20525778830051422, "rewards/margins": 0.19895179569721222, "rewards/rejected": -0.40420958399772644, "step": 3733 }, { "epoch": 10.223134839151266, "grad_norm": 6.753438949584961, "learning_rate": 4.884931506849315e-07, "log_odds_chosen": 0.8025225400924683, "log_odds_ratio": -0.9609387516975403, "logits/chosen": 0.8003795742988586, "logits/rejected": 0.7000995874404907, "logps/chosen": -2.473214864730835, "logps/rejected": -3.2532858848571777, "loss": 0.6475, "nll_loss": 0.5514501929283142, "rewards/accuracies": 0.75, "rewards/chosen": -0.2473215013742447, "rewards/margins": 0.07800710201263428, "rewards/rejected": -0.3253285884857178, "step": 3734 }, { "epoch": 10.225872689938399, "grad_norm": 5.506337642669678, "learning_rate": 4.883561643835617e-07, "log_odds_chosen": 1.5986089706420898, "log_odds_ratio": -0.5584664344787598, "logits/chosen": 1.1281569004058838, "logits/rejected": 1.1271296739578247, "logps/chosen": -1.9884337186813354, "logps/rejected": -3.419907331466675, "loss": 0.6046, "nll_loss": 0.5487034916877747, "rewards/accuracies": 0.75, "rewards/chosen": -0.19884337484836578, "rewards/margins": 0.14314737915992737, "rewards/rejected": -0.34199073910713196, "step": 3735 }, { "epoch": 10.22861054072553, "grad_norm": 3.0624594688415527, "learning_rate": 4.882191780821918e-07, "log_odds_chosen": 2.50347638130188, "log_odds_ratio": -0.183515265583992, "logits/chosen": 1.184412956237793, "logits/rejected": 1.182504415512085, "logps/chosen": -1.7623307704925537, "logps/rejected": -4.105000972747803, "loss": 0.5689, "nll_loss": 0.5505965352058411, "rewards/accuracies": 1.0, "rewards/chosen": -0.17623308300971985, "rewards/margins": 0.23426702618598938, "rewards/rejected": -0.41050004959106445, "step": 3736 }, { "epoch": 10.231348391512663, "grad_norm": 3.460373640060425, "learning_rate": 4.880821917808219e-07, "log_odds_chosen": 3.557573080062866, "log_odds_ratio": -0.12412229180335999, "logits/chosen": 0.7802374958992004, "logits/rejected": 0.6937856674194336, "logps/chosen": -1.3250160217285156, "logps/rejected": -4.574474334716797, "loss": 0.6316, "nll_loss": 0.6192246675491333, "rewards/accuracies": 1.0, "rewards/chosen": -0.13250160217285156, "rewards/margins": 0.3249458372592926, "rewards/rejected": -0.45744743943214417, "step": 3737 }, { "epoch": 10.234086242299794, "grad_norm": 3.4848220348358154, "learning_rate": 4.879452054794521e-07, "log_odds_chosen": 2.9409146308898926, "log_odds_ratio": -0.2028474062681198, "logits/chosen": 0.967795193195343, "logits/rejected": 0.9763882160186768, "logps/chosen": -2.2835280895233154, "logps/rejected": -5.003145694732666, "loss": 0.7271, "nll_loss": 0.7068557739257812, "rewards/accuracies": 1.0, "rewards/chosen": -0.22835281491279602, "rewards/margins": 0.2719617486000061, "rewards/rejected": -0.5003145933151245, "step": 3738 }, { "epoch": 10.236824093086927, "grad_norm": 3.409105062484741, "learning_rate": 4.878082191780822e-07, "log_odds_chosen": 2.6120779514312744, "log_odds_ratio": -0.22869758307933807, "logits/chosen": 0.6701573133468628, "logits/rejected": 0.5684036016464233, "logps/chosen": -2.2065253257751465, "logps/rejected": -4.71068811416626, "loss": 0.6391, "nll_loss": 0.6162109375, "rewards/accuracies": 1.0, "rewards/chosen": -0.2206525206565857, "rewards/margins": 0.2504163086414337, "rewards/rejected": -0.4710688591003418, "step": 3739 }, { "epoch": 10.239561943874058, "grad_norm": 3.680332899093628, "learning_rate": 4.876712328767123e-07, "log_odds_chosen": 1.0419819355010986, "log_odds_ratio": -0.454096257686615, "logits/chosen": 0.7484190464019775, "logits/rejected": 0.6654759645462036, "logps/chosen": -1.7947347164154053, "logps/rejected": -2.7455105781555176, "loss": 0.671, "nll_loss": 0.6255479454994202, "rewards/accuracies": 0.625, "rewards/chosen": -0.17947348952293396, "rewards/margins": 0.09507758915424347, "rewards/rejected": -0.2745510935783386, "step": 3740 }, { "epoch": 10.242299794661191, "grad_norm": 4.706099987030029, "learning_rate": 4.875342465753424e-07, "log_odds_chosen": 2.521510124206543, "log_odds_ratio": -0.1891656219959259, "logits/chosen": 0.8567432165145874, "logits/rejected": 0.8923261761665344, "logps/chosen": -1.8431235551834106, "logps/rejected": -4.199723720550537, "loss": 0.6241, "nll_loss": 0.6051785945892334, "rewards/accuracies": 0.875, "rewards/chosen": -0.1843123584985733, "rewards/margins": 0.23566001653671265, "rewards/rejected": -0.41997238993644714, "step": 3741 }, { "epoch": 10.245037645448322, "grad_norm": 5.499622344970703, "learning_rate": 4.873972602739726e-07, "log_odds_chosen": 1.4091967344284058, "log_odds_ratio": -0.4628906548023224, "logits/chosen": 1.053261637687683, "logits/rejected": 1.0656261444091797, "logps/chosen": -2.8147125244140625, "logps/rejected": -4.1331329345703125, "loss": 0.6276, "nll_loss": 0.581350564956665, "rewards/accuracies": 0.75, "rewards/chosen": -0.28147125244140625, "rewards/margins": 0.13184207677841187, "rewards/rejected": -0.4133133292198181, "step": 3742 }, { "epoch": 10.247775496235455, "grad_norm": 5.7068867683410645, "learning_rate": 4.872602739726027e-07, "log_odds_chosen": 3.1346826553344727, "log_odds_ratio": -0.3014427721500397, "logits/chosen": 1.1473677158355713, "logits/rejected": 1.1511344909667969, "logps/chosen": -2.225754976272583, "logps/rejected": -5.1454620361328125, "loss": 0.5923, "nll_loss": 0.5621519088745117, "rewards/accuracies": 0.875, "rewards/chosen": -0.22257548570632935, "rewards/margins": 0.29197070002555847, "rewards/rejected": -0.5145461559295654, "step": 3743 }, { "epoch": 10.250513347022586, "grad_norm": 3.5316197872161865, "learning_rate": 4.871232876712328e-07, "log_odds_chosen": 3.8627912998199463, "log_odds_ratio": -0.22919553518295288, "logits/chosen": 0.9489603042602539, "logits/rejected": 0.9640648365020752, "logps/chosen": -1.9709426164627075, "logps/rejected": -5.6235504150390625, "loss": 0.6695, "nll_loss": 0.6465593576431274, "rewards/accuracies": 0.875, "rewards/chosen": -0.19709426164627075, "rewards/margins": 0.3652608394622803, "rewards/rejected": -0.5623550415039062, "step": 3744 }, { "epoch": 10.25325119780972, "grad_norm": 3.5371057987213135, "learning_rate": 4.86986301369863e-07, "log_odds_chosen": 2.6196320056915283, "log_odds_ratio": -0.25282639265060425, "logits/chosen": 0.8858515024185181, "logits/rejected": 0.845489501953125, "logps/chosen": -1.9497370719909668, "logps/rejected": -4.458187103271484, "loss": 0.641, "nll_loss": 0.6157418489456177, "rewards/accuracies": 1.0, "rewards/chosen": -0.19497370719909668, "rewards/margins": 0.2508450150489807, "rewards/rejected": -0.4458187520503998, "step": 3745 }, { "epoch": 10.255989048596852, "grad_norm": 3.7292890548706055, "learning_rate": 4.868493150684931e-07, "log_odds_chosen": 3.6582508087158203, "log_odds_ratio": -0.31727105379104614, "logits/chosen": 1.2103476524353027, "logits/rejected": 1.181164026260376, "logps/chosen": -2.159437894821167, "logps/rejected": -5.731014728546143, "loss": 0.6308, "nll_loss": 0.599102258682251, "rewards/accuracies": 0.875, "rewards/chosen": -0.2159438133239746, "rewards/margins": 0.3571576774120331, "rewards/rejected": -0.5731014609336853, "step": 3746 }, { "epoch": 10.258726899383984, "grad_norm": 6.433704853057861, "learning_rate": 4.867123287671233e-07, "log_odds_chosen": 2.0765607357025146, "log_odds_ratio": -0.3880246579647064, "logits/chosen": 0.8331565260887146, "logits/rejected": 0.7847287654876709, "logps/chosen": -1.996416687965393, "logps/rejected": -3.88870906829834, "loss": 0.58, "nll_loss": 0.5412311553955078, "rewards/accuracies": 0.875, "rewards/chosen": -0.19964168965816498, "rewards/margins": 0.18922924995422363, "rewards/rejected": -0.3888709247112274, "step": 3747 }, { "epoch": 10.261464750171116, "grad_norm": 4.409358024597168, "learning_rate": 4.865753424657534e-07, "log_odds_chosen": 2.8037471771240234, "log_odds_ratio": -0.23258498311042786, "logits/chosen": 0.8345723152160645, "logits/rejected": 0.9218094348907471, "logps/chosen": -2.25819730758667, "logps/rejected": -4.7656660079956055, "loss": 0.6908, "nll_loss": 0.6675753593444824, "rewards/accuracies": 0.875, "rewards/chosen": -0.22581973671913147, "rewards/margins": 0.25074687600135803, "rewards/rejected": -0.4765666127204895, "step": 3748 }, { "epoch": 10.264202600958248, "grad_norm": 3.6784982681274414, "learning_rate": 4.864383561643836e-07, "log_odds_chosen": 1.5210504531860352, "log_odds_ratio": -0.3549133241176605, "logits/chosen": 1.3188092708587646, "logits/rejected": 1.3453022241592407, "logps/chosen": -2.261078119277954, "logps/rejected": -3.5883309841156006, "loss": 0.5969, "nll_loss": 0.5613895654678345, "rewards/accuracies": 0.875, "rewards/chosen": -0.22610782086849213, "rewards/margins": 0.1327252984046936, "rewards/rejected": -0.3588331341743469, "step": 3749 }, { "epoch": 10.26694045174538, "grad_norm": 3.596989154815674, "learning_rate": 4.863013698630137e-07, "log_odds_chosen": 3.3021209239959717, "log_odds_ratio": -0.13753396272659302, "logits/chosen": 1.1604381799697876, "logits/rejected": 1.1408997774124146, "logps/chosen": -1.4568912982940674, "logps/rejected": -4.530622482299805, "loss": 0.6296, "nll_loss": 0.6158568859100342, "rewards/accuracies": 1.0, "rewards/chosen": -0.14568912982940674, "rewards/margins": 0.30737313628196716, "rewards/rejected": -0.4530622661113739, "step": 3750 }, { "epoch": 10.269678302532512, "grad_norm": 4.308490753173828, "learning_rate": 4.861643835616438e-07, "log_odds_chosen": 2.230483055114746, "log_odds_ratio": -0.3231221139431, "logits/chosen": 1.1356960535049438, "logits/rejected": 1.1395124197006226, "logps/chosen": -2.172534942626953, "logps/rejected": -4.27325439453125, "loss": 0.585, "nll_loss": 0.5526431202888489, "rewards/accuracies": 0.875, "rewards/chosen": -0.21725350618362427, "rewards/margins": 0.21007192134857178, "rewards/rejected": -0.42732545733451843, "step": 3751 }, { "epoch": 10.272416153319645, "grad_norm": 4.270919322967529, "learning_rate": 4.86027397260274e-07, "log_odds_chosen": 1.1050816774368286, "log_odds_ratio": -0.4291401207447052, "logits/chosen": 1.0596179962158203, "logits/rejected": 1.0423616170883179, "logps/chosen": -1.9237605333328247, "logps/rejected": -2.8579907417297363, "loss": 0.6203, "nll_loss": 0.5773409008979797, "rewards/accuracies": 0.875, "rewards/chosen": -0.192376047372818, "rewards/margins": 0.09342304617166519, "rewards/rejected": -0.285799115896225, "step": 3752 }, { "epoch": 10.275154004106776, "grad_norm": 3.243462562561035, "learning_rate": 4.858904109589041e-07, "log_odds_chosen": 2.8292500972747803, "log_odds_ratio": -0.15222540497779846, "logits/chosen": 1.285278081893921, "logits/rejected": 1.3134795427322388, "logps/chosen": -2.0663156509399414, "logps/rejected": -4.759347438812256, "loss": 0.5824, "nll_loss": 0.5672095417976379, "rewards/accuracies": 1.0, "rewards/chosen": -0.20663155615329742, "rewards/margins": 0.2693031430244446, "rewards/rejected": -0.4759347438812256, "step": 3753 }, { "epoch": 10.277891854893909, "grad_norm": 4.75665807723999, "learning_rate": 4.857534246575342e-07, "log_odds_chosen": 1.8851057291030884, "log_odds_ratio": -0.3202074468135834, "logits/chosen": 0.9828230738639832, "logits/rejected": 1.0423150062561035, "logps/chosen": -2.8992397785186768, "logps/rejected": -4.683497428894043, "loss": 0.7234, "nll_loss": 0.6913775205612183, "rewards/accuracies": 0.875, "rewards/chosen": -0.2899239957332611, "rewards/margins": 0.17842575907707214, "rewards/rejected": -0.46834975481033325, "step": 3754 }, { "epoch": 10.28062970568104, "grad_norm": 3.3737633228302, "learning_rate": 4.856164383561643e-07, "log_odds_chosen": 2.385389804840088, "log_odds_ratio": -0.3142615556716919, "logits/chosen": 0.8098195195198059, "logits/rejected": 0.7698782682418823, "logps/chosen": -1.8038289546966553, "logps/rejected": -4.07604455947876, "loss": 0.574, "nll_loss": 0.5426220893859863, "rewards/accuracies": 0.875, "rewards/chosen": -0.1803828924894333, "rewards/margins": 0.22722157835960388, "rewards/rejected": -0.40760448575019836, "step": 3755 }, { "epoch": 10.283367556468173, "grad_norm": 4.485717296600342, "learning_rate": 4.854794520547945e-07, "log_odds_chosen": 1.1779630184173584, "log_odds_ratio": -0.3017737865447998, "logits/chosen": 0.9415667057037354, "logits/rejected": 0.8593565225601196, "logps/chosen": -1.6673561334609985, "logps/rejected": -2.6471381187438965, "loss": 0.5795, "nll_loss": 0.5493322610855103, "rewards/accuracies": 1.0, "rewards/chosen": -0.16673558950424194, "rewards/margins": 0.09797820448875427, "rewards/rejected": -0.2647138237953186, "step": 3756 }, { "epoch": 10.286105407255304, "grad_norm": 3.4864370822906494, "learning_rate": 4.853424657534246e-07, "log_odds_chosen": 1.280141830444336, "log_odds_ratio": -0.4044003486633301, "logits/chosen": 0.9863371849060059, "logits/rejected": 0.9847502708435059, "logps/chosen": -1.842581033706665, "logps/rejected": -3.060612201690674, "loss": 0.5795, "nll_loss": 0.5390950441360474, "rewards/accuracies": 0.75, "rewards/chosen": -0.1842581182718277, "rewards/margins": 0.12180311232805252, "rewards/rejected": -0.3060612380504608, "step": 3757 }, { "epoch": 10.288843258042437, "grad_norm": 3.278071403503418, "learning_rate": 4.852054794520547e-07, "log_odds_chosen": 3.676757335662842, "log_odds_ratio": -0.10209831595420837, "logits/chosen": 0.9429290294647217, "logits/rejected": 0.9892159700393677, "logps/chosen": -2.151919364929199, "logps/rejected": -5.649068832397461, "loss": 0.6006, "nll_loss": 0.5903766751289368, "rewards/accuracies": 1.0, "rewards/chosen": -0.21519194543361664, "rewards/margins": 0.3497149348258972, "rewards/rejected": -0.564906895160675, "step": 3758 }, { "epoch": 10.291581108829568, "grad_norm": 3.357600212097168, "learning_rate": 4.850684931506849e-07, "log_odds_chosen": 2.7843077182769775, "log_odds_ratio": -0.14110447466373444, "logits/chosen": 0.7260317206382751, "logits/rejected": 0.7208659648895264, "logps/chosen": -1.9404006004333496, "logps/rejected": -4.52376127243042, "loss": 0.5835, "nll_loss": 0.5694324970245361, "rewards/accuracies": 1.0, "rewards/chosen": -0.19404006004333496, "rewards/margins": 0.2583360970020294, "rewards/rejected": -0.452376127243042, "step": 3759 }, { "epoch": 10.294318959616701, "grad_norm": 6.278687953948975, "learning_rate": 4.84931506849315e-07, "log_odds_chosen": 2.6214170455932617, "log_odds_ratio": -0.21732556819915771, "logits/chosen": 0.8989380598068237, "logits/rejected": 0.8515142202377319, "logps/chosen": -1.909546136856079, "logps/rejected": -4.394552707672119, "loss": 0.6687, "nll_loss": 0.6470163464546204, "rewards/accuracies": 1.0, "rewards/chosen": -0.19095459580421448, "rewards/margins": 0.24850068986415863, "rewards/rejected": -0.4394552707672119, "step": 3760 }, { "epoch": 10.297056810403832, "grad_norm": 3.76050066947937, "learning_rate": 4.847945205479452e-07, "log_odds_chosen": 3.6972200870513916, "log_odds_ratio": -0.10779225081205368, "logits/chosen": 0.9201191663742065, "logits/rejected": 0.9429842233657837, "logps/chosen": -1.887527346611023, "logps/rejected": -5.32624626159668, "loss": 0.5698, "nll_loss": 0.5589966773986816, "rewards/accuracies": 1.0, "rewards/chosen": -0.18875272572040558, "rewards/margins": 0.3438718616962433, "rewards/rejected": -0.5326246023178101, "step": 3761 }, { "epoch": 10.299794661190965, "grad_norm": 3.700577735900879, "learning_rate": 4.846575342465753e-07, "log_odds_chosen": 2.3616271018981934, "log_odds_ratio": -0.211654394865036, "logits/chosen": 0.8413851261138916, "logits/rejected": 0.8388633728027344, "logps/chosen": -2.426015853881836, "logps/rejected": -4.65281867980957, "loss": 0.6023, "nll_loss": 0.5811318159103394, "rewards/accuracies": 1.0, "rewards/chosen": -0.24260158836841583, "rewards/margins": 0.22268031537532806, "rewards/rejected": -0.4652819037437439, "step": 3762 }, { "epoch": 10.302532511978097, "grad_norm": 3.496993064880371, "learning_rate": 4.845205479452055e-07, "log_odds_chosen": 1.8066807985305786, "log_odds_ratio": -0.2978777289390564, "logits/chosen": 0.8598923683166504, "logits/rejected": 0.8175729513168335, "logps/chosen": -1.9890414476394653, "logps/rejected": -3.669506788253784, "loss": 0.6384, "nll_loss": 0.6085726022720337, "rewards/accuracies": 0.875, "rewards/chosen": -0.1989041566848755, "rewards/margins": 0.16804653406143188, "rewards/rejected": -0.3669506907463074, "step": 3763 }, { "epoch": 10.30527036276523, "grad_norm": 3.3469669818878174, "learning_rate": 4.843835616438356e-07, "log_odds_chosen": 2.940807580947876, "log_odds_ratio": -0.20733140408992767, "logits/chosen": 1.0219035148620605, "logits/rejected": 0.9902539253234863, "logps/chosen": -1.982375144958496, "logps/rejected": -4.783001899719238, "loss": 0.654, "nll_loss": 0.633304238319397, "rewards/accuracies": 0.875, "rewards/chosen": -0.19823752343654633, "rewards/margins": 0.2800626754760742, "rewards/rejected": -0.47830018401145935, "step": 3764 }, { "epoch": 10.30800821355236, "grad_norm": 3.2335071563720703, "learning_rate": 4.842465753424657e-07, "log_odds_chosen": 2.8026509284973145, "log_odds_ratio": -0.19404715299606323, "logits/chosen": 1.0192253589630127, "logits/rejected": 1.0939357280731201, "logps/chosen": -2.4481801986694336, "logps/rejected": -5.1685075759887695, "loss": 0.744, "nll_loss": 0.7246324419975281, "rewards/accuracies": 1.0, "rewards/chosen": -0.24481801688671112, "rewards/margins": 0.2720327377319336, "rewards/rejected": -0.5168507695198059, "step": 3765 }, { "epoch": 10.310746064339494, "grad_norm": 3.2533934116363525, "learning_rate": 4.841095890410959e-07, "log_odds_chosen": 3.8411970138549805, "log_odds_ratio": -0.2932771146297455, "logits/chosen": 0.6584181785583496, "logits/rejected": 0.6799430251121521, "logps/chosen": -2.144073009490967, "logps/rejected": -5.883522987365723, "loss": 0.6373, "nll_loss": 0.6079999208450317, "rewards/accuracies": 0.875, "rewards/chosen": -0.2144073098897934, "rewards/margins": 0.3739449679851532, "rewards/rejected": -0.5883523225784302, "step": 3766 }, { "epoch": 10.313483915126625, "grad_norm": 7.638189792633057, "learning_rate": 4.83972602739726e-07, "log_odds_chosen": 0.9507028460502625, "log_odds_ratio": -0.4692457914352417, "logits/chosen": 0.9357732534408569, "logits/rejected": 0.9300230741500854, "logps/chosen": -2.2723388671875, "logps/rejected": -3.140676736831665, "loss": 0.5721, "nll_loss": 0.5252025127410889, "rewards/accuracies": 0.875, "rewards/chosen": -0.22723388671875, "rewards/margins": 0.08683376759290695, "rewards/rejected": -0.31406766176223755, "step": 3767 }, { "epoch": 10.316221765913758, "grad_norm": 2.832688808441162, "learning_rate": 4.838356164383561e-07, "log_odds_chosen": 2.897674798965454, "log_odds_ratio": -0.22377245128154755, "logits/chosen": 1.2586369514465332, "logits/rejected": 1.2891654968261719, "logps/chosen": -1.8037972450256348, "logps/rejected": -4.516123294830322, "loss": 0.5531, "nll_loss": 0.5307202935218811, "rewards/accuracies": 0.875, "rewards/chosen": -0.1803797334432602, "rewards/margins": 0.271232545375824, "rewards/rejected": -0.45161229372024536, "step": 3768 }, { "epoch": 10.318959616700889, "grad_norm": 4.008209228515625, "learning_rate": 4.836986301369862e-07, "log_odds_chosen": 3.461033344268799, "log_odds_ratio": -0.12678612768650055, "logits/chosen": 1.1683387756347656, "logits/rejected": 1.1849311590194702, "logps/chosen": -2.047592878341675, "logps/rejected": -5.283586502075195, "loss": 0.6406, "nll_loss": 0.6279195547103882, "rewards/accuracies": 1.0, "rewards/chosen": -0.20475928485393524, "rewards/margins": 0.32359936833381653, "rewards/rejected": -0.5283586382865906, "step": 3769 }, { "epoch": 10.321697467488022, "grad_norm": 3.6715359687805176, "learning_rate": 4.835616438356164e-07, "log_odds_chosen": 2.7844717502593994, "log_odds_ratio": -0.19437454640865326, "logits/chosen": 0.9725143909454346, "logits/rejected": 0.9895090460777283, "logps/chosen": -1.8821978569030762, "logps/rejected": -4.532232284545898, "loss": 0.5785, "nll_loss": 0.559043824672699, "rewards/accuracies": 1.0, "rewards/chosen": -0.18821978569030762, "rewards/margins": 0.2650034427642822, "rewards/rejected": -0.45322322845458984, "step": 3770 }, { "epoch": 10.324435318275153, "grad_norm": 3.774888515472412, "learning_rate": 4.834246575342465e-07, "log_odds_chosen": 1.134002447128296, "log_odds_ratio": -0.605383574962616, "logits/chosen": 0.8838460445404053, "logits/rejected": 0.9037327766418457, "logps/chosen": -2.2121267318725586, "logps/rejected": -3.257355213165283, "loss": 0.5626, "nll_loss": 0.5020300149917603, "rewards/accuracies": 0.75, "rewards/chosen": -0.22121268510818481, "rewards/margins": 0.10452285408973694, "rewards/rejected": -0.32573553919792175, "step": 3771 }, { "epoch": 10.327173169062286, "grad_norm": 3.6216349601745605, "learning_rate": 4.832876712328766e-07, "log_odds_chosen": 1.305717945098877, "log_odds_ratio": -0.3211970627307892, "logits/chosen": 0.8845524787902832, "logits/rejected": 0.7595922946929932, "logps/chosen": -2.195279359817505, "logps/rejected": -3.4227259159088135, "loss": 0.6832, "nll_loss": 0.6511008143424988, "rewards/accuracies": 0.875, "rewards/chosen": -0.219527930021286, "rewards/margins": 0.12274465709924698, "rewards/rejected": -0.3422726094722748, "step": 3772 }, { "epoch": 10.329911019849419, "grad_norm": 3.393771171569824, "learning_rate": 4.831506849315068e-07, "log_odds_chosen": 4.167352199554443, "log_odds_ratio": -0.06234760582447052, "logits/chosen": 0.9825732707977295, "logits/rejected": 0.982161819934845, "logps/chosen": -1.8449324369430542, "logps/rejected": -5.83522891998291, "loss": 0.5344, "nll_loss": 0.5281693935394287, "rewards/accuracies": 1.0, "rewards/chosen": -0.18449324369430542, "rewards/margins": 0.3990296423435211, "rewards/rejected": -0.5835229158401489, "step": 3773 }, { "epoch": 10.33264887063655, "grad_norm": 4.684404373168945, "learning_rate": 4.830136986301369e-07, "log_odds_chosen": 1.9782915115356445, "log_odds_ratio": -0.3272027373313904, "logits/chosen": 1.04218590259552, "logits/rejected": 0.9746188521385193, "logps/chosen": -2.172666549682617, "logps/rejected": -4.003485679626465, "loss": 0.5812, "nll_loss": 0.5484933853149414, "rewards/accuracies": 0.875, "rewards/chosen": -0.21726666390895844, "rewards/margins": 0.18308189511299133, "rewards/rejected": -0.40034857392311096, "step": 3774 }, { "epoch": 10.335386721423683, "grad_norm": 6.8187360763549805, "learning_rate": 4.828767123287671e-07, "log_odds_chosen": 0.31185758113861084, "log_odds_ratio": -0.7060668468475342, "logits/chosen": 0.8778616189956665, "logits/rejected": 0.9120387434959412, "logps/chosen": -2.638230085372925, "logps/rejected": -2.903980255126953, "loss": 0.6314, "nll_loss": 0.56074458360672, "rewards/accuracies": 0.625, "rewards/chosen": -0.2638230323791504, "rewards/margins": 0.02657502330839634, "rewards/rejected": -0.2903980612754822, "step": 3775 }, { "epoch": 10.338124572210814, "grad_norm": 3.6082916259765625, "learning_rate": 4.827397260273972e-07, "log_odds_chosen": 2.7977185249328613, "log_odds_ratio": -0.14050832390785217, "logits/chosen": 0.7600639462471008, "logits/rejected": 0.7510321140289307, "logps/chosen": -1.2188777923583984, "logps/rejected": -3.6541850566864014, "loss": 0.5261, "nll_loss": 0.5120134353637695, "rewards/accuracies": 1.0, "rewards/chosen": -0.12188778817653656, "rewards/margins": 0.24353070557117462, "rewards/rejected": -0.3654184937477112, "step": 3776 }, { "epoch": 10.340862422997947, "grad_norm": 3.4878642559051514, "learning_rate": 4.826027397260274e-07, "log_odds_chosen": 2.7777180671691895, "log_odds_ratio": -0.12767328321933746, "logits/chosen": 1.4113199710845947, "logits/rejected": 1.45624577999115, "logps/chosen": -2.221484899520874, "logps/rejected": -4.839816570281982, "loss": 0.5622, "nll_loss": 0.5494660139083862, "rewards/accuracies": 1.0, "rewards/chosen": -0.22214847803115845, "rewards/margins": 0.26183319091796875, "rewards/rejected": -0.4839816689491272, "step": 3777 }, { "epoch": 10.343600273785079, "grad_norm": 2.782052993774414, "learning_rate": 4.824657534246575e-07, "log_odds_chosen": 3.9566009044647217, "log_odds_ratio": -0.08648452907800674, "logits/chosen": 0.9842771291732788, "logits/rejected": 0.9800326824188232, "logps/chosen": -2.368212938308716, "logps/rejected": -6.182191371917725, "loss": 0.6005, "nll_loss": 0.5918471217155457, "rewards/accuracies": 1.0, "rewards/chosen": -0.23682132363319397, "rewards/margins": 0.3813978433609009, "rewards/rejected": -0.6182191371917725, "step": 3778 }, { "epoch": 10.346338124572211, "grad_norm": 4.304703235626221, "learning_rate": 4.823287671232876e-07, "log_odds_chosen": 2.725040912628174, "log_odds_ratio": -0.2093181610107422, "logits/chosen": 1.1896907091140747, "logits/rejected": 1.2392388582229614, "logps/chosen": -2.114938974380493, "logps/rejected": -4.57227087020874, "loss": 0.5609, "nll_loss": 0.5399686694145203, "rewards/accuracies": 1.0, "rewards/chosen": -0.21149387955665588, "rewards/margins": 0.24573321640491486, "rewards/rejected": -0.45722711086273193, "step": 3779 }, { "epoch": 10.349075975359343, "grad_norm": 3.6128010749816895, "learning_rate": 4.821917808219178e-07, "log_odds_chosen": 3.198118209838867, "log_odds_ratio": -0.11890057474374771, "logits/chosen": 0.6664390563964844, "logits/rejected": 0.677550196647644, "logps/chosen": -1.445199966430664, "logps/rejected": -4.210931777954102, "loss": 0.6971, "nll_loss": 0.6851823329925537, "rewards/accuracies": 1.0, "rewards/chosen": -0.14451999962329865, "rewards/margins": 0.27657318115234375, "rewards/rejected": -0.4210931658744812, "step": 3780 }, { "epoch": 10.351813826146476, "grad_norm": 5.090275287628174, "learning_rate": 4.820547945205479e-07, "log_odds_chosen": 1.2710413932800293, "log_odds_ratio": -0.6822346448898315, "logits/chosen": 0.996065616607666, "logits/rejected": 1.0190808773040771, "logps/chosen": -2.2387642860412598, "logps/rejected": -3.3582370281219482, "loss": 0.5687, "nll_loss": 0.5004491806030273, "rewards/accuracies": 0.75, "rewards/chosen": -0.22387641668319702, "rewards/margins": 0.11194729804992676, "rewards/rejected": -0.3358237147331238, "step": 3781 }, { "epoch": 10.354551676933607, "grad_norm": 3.335805892944336, "learning_rate": 4.81917808219178e-07, "log_odds_chosen": 4.287425518035889, "log_odds_ratio": -0.06824269890785217, "logits/chosen": 1.1112290620803833, "logits/rejected": 1.0914552211761475, "logps/chosen": -1.4240745306015015, "logps/rejected": -5.403040409088135, "loss": 0.5681, "nll_loss": 0.5612666010856628, "rewards/accuracies": 1.0, "rewards/chosen": -0.14240746200084686, "rewards/margins": 0.39789658784866333, "rewards/rejected": -0.5403040647506714, "step": 3782 }, { "epoch": 10.35728952772074, "grad_norm": 3.327393054962158, "learning_rate": 4.817808219178082e-07, "log_odds_chosen": 2.233454942703247, "log_odds_ratio": -0.3130860924720764, "logits/chosen": 0.9224952459335327, "logits/rejected": 0.8740917444229126, "logps/chosen": -1.3796029090881348, "logps/rejected": -3.418147087097168, "loss": 0.6239, "nll_loss": 0.592595100402832, "rewards/accuracies": 1.0, "rewards/chosen": -0.1379602700471878, "rewards/margins": 0.20385444164276123, "rewards/rejected": -0.34181472659111023, "step": 3783 }, { "epoch": 10.360027378507871, "grad_norm": 3.637624740600586, "learning_rate": 4.816438356164383e-07, "log_odds_chosen": 1.382384181022644, "log_odds_ratio": -0.3152286112308502, "logits/chosen": 1.0723741054534912, "logits/rejected": 1.0554614067077637, "logps/chosen": -1.5711346864700317, "logps/rejected": -2.7172367572784424, "loss": 0.4817, "nll_loss": 0.4501645863056183, "rewards/accuracies": 0.875, "rewards/chosen": -0.15711349248886108, "rewards/margins": 0.1146102026104927, "rewards/rejected": -0.2717236876487732, "step": 3784 }, { "epoch": 10.362765229295004, "grad_norm": 3.3115105628967285, "learning_rate": 4.815068493150684e-07, "log_odds_chosen": 2.3980872631073, "log_odds_ratio": -0.26838141679763794, "logits/chosen": 0.7408208250999451, "logits/rejected": 0.7394441366195679, "logps/chosen": -1.8143864870071411, "logps/rejected": -4.060371398925781, "loss": 0.5717, "nll_loss": 0.5448397397994995, "rewards/accuracies": 1.0, "rewards/chosen": -0.1814386546611786, "rewards/margins": 0.22459852695465088, "rewards/rejected": -0.40603718161582947, "step": 3785 }, { "epoch": 10.365503080082135, "grad_norm": 3.3155903816223145, "learning_rate": 4.813698630136985e-07, "log_odds_chosen": 2.5151588916778564, "log_odds_ratio": -0.20864413678646088, "logits/chosen": 1.0160552263259888, "logits/rejected": 1.032008171081543, "logps/chosen": -2.2159693241119385, "logps/rejected": -4.604024887084961, "loss": 0.5853, "nll_loss": 0.5644778609275818, "rewards/accuracies": 1.0, "rewards/chosen": -0.22159694135189056, "rewards/margins": 0.23880557715892792, "rewards/rejected": -0.4604025185108185, "step": 3786 }, { "epoch": 10.368240930869268, "grad_norm": 6.095314025878906, "learning_rate": 4.812328767123287e-07, "log_odds_chosen": 1.8537249565124512, "log_odds_ratio": -0.5853513479232788, "logits/chosen": 0.8251651525497437, "logits/rejected": 0.8362975120544434, "logps/chosen": -2.4540576934814453, "logps/rejected": -4.144575119018555, "loss": 0.6687, "nll_loss": 0.6102060675621033, "rewards/accuracies": 0.875, "rewards/chosen": -0.24540576338768005, "rewards/margins": 0.16905178129673004, "rewards/rejected": -0.4144575297832489, "step": 3787 }, { "epoch": 10.3709787816564, "grad_norm": 3.3940393924713135, "learning_rate": 4.810958904109588e-07, "log_odds_chosen": 3.7543625831604004, "log_odds_ratio": -0.11682084947824478, "logits/chosen": 0.9149040579795837, "logits/rejected": 0.9273653030395508, "logps/chosen": -1.508026361465454, "logps/rejected": -5.012528419494629, "loss": 0.6035, "nll_loss": 0.5918099284172058, "rewards/accuracies": 1.0, "rewards/chosen": -0.1508026421070099, "rewards/margins": 0.35045021772384644, "rewards/rejected": -0.5012528300285339, "step": 3788 }, { "epoch": 10.373716632443532, "grad_norm": 3.60768985748291, "learning_rate": 4.80958904109589e-07, "log_odds_chosen": 1.1373484134674072, "log_odds_ratio": -0.29451504349708557, "logits/chosen": 0.7204200029373169, "logits/rejected": 0.6573619842529297, "logps/chosen": -1.513792872428894, "logps/rejected": -2.4803545475006104, "loss": 0.564, "nll_loss": 0.5345162153244019, "rewards/accuracies": 1.0, "rewards/chosen": -0.1513792723417282, "rewards/margins": 0.0966561809182167, "rewards/rejected": -0.2480354607105255, "step": 3789 }, { "epoch": 10.376454483230663, "grad_norm": 3.4760520458221436, "learning_rate": 4.808219178082192e-07, "log_odds_chosen": 2.091806411743164, "log_odds_ratio": -0.2659562826156616, "logits/chosen": 0.6733232140541077, "logits/rejected": 0.5964084267616272, "logps/chosen": -1.4941811561584473, "logps/rejected": -3.286208152770996, "loss": 0.6016, "nll_loss": 0.5750517249107361, "rewards/accuracies": 1.0, "rewards/chosen": -0.14941813051700592, "rewards/margins": 0.17920267581939697, "rewards/rejected": -0.3286207914352417, "step": 3790 }, { "epoch": 10.379192334017796, "grad_norm": 3.268773317337036, "learning_rate": 4.806849315068492e-07, "log_odds_chosen": 2.7393550872802734, "log_odds_ratio": -0.23719382286071777, "logits/chosen": 0.9891535639762878, "logits/rejected": 0.9615364074707031, "logps/chosen": -1.687121868133545, "logps/rejected": -4.28000545501709, "loss": 0.5232, "nll_loss": 0.4994724988937378, "rewards/accuracies": 1.0, "rewards/chosen": -0.16871219873428345, "rewards/margins": 0.25928834080696106, "rewards/rejected": -0.4280005097389221, "step": 3791 }, { "epoch": 10.381930184804927, "grad_norm": 4.103049278259277, "learning_rate": 4.805479452054795e-07, "log_odds_chosen": 0.8424545526504517, "log_odds_ratio": -0.5475043058395386, "logits/chosen": 0.7850892543792725, "logits/rejected": 0.7443513870239258, "logps/chosen": -1.9661107063293457, "logps/rejected": -2.731320858001709, "loss": 0.5942, "nll_loss": 0.5394540429115295, "rewards/accuracies": 0.75, "rewards/chosen": -0.19661107659339905, "rewards/margins": 0.07652101665735245, "rewards/rejected": -0.2731321156024933, "step": 3792 }, { "epoch": 10.38466803559206, "grad_norm": 4.914765357971191, "learning_rate": 4.804109589041096e-07, "log_odds_chosen": 3.6694540977478027, "log_odds_ratio": -0.2828243374824524, "logits/chosen": 0.9697169065475464, "logits/rejected": 0.9460691213607788, "logps/chosen": -2.055436134338379, "logps/rejected": -5.6017608642578125, "loss": 0.6467, "nll_loss": 0.6184242367744446, "rewards/accuracies": 0.875, "rewards/chosen": -0.2055436074733734, "rewards/margins": 0.35463252663612366, "rewards/rejected": -0.5601761341094971, "step": 3793 }, { "epoch": 10.387405886379192, "grad_norm": 3.426975727081299, "learning_rate": 4.802739726027398e-07, "log_odds_chosen": 2.4380855560302734, "log_odds_ratio": -0.14371225237846375, "logits/chosen": 0.816681444644928, "logits/rejected": 0.7375104427337646, "logps/chosen": -1.9131402969360352, "logps/rejected": -4.203369140625, "loss": 0.6267, "nll_loss": 0.6123473644256592, "rewards/accuracies": 1.0, "rewards/chosen": -0.19131402671337128, "rewards/margins": 0.22902287542819977, "rewards/rejected": -0.42033690214157104, "step": 3794 }, { "epoch": 10.390143737166325, "grad_norm": 3.675733804702759, "learning_rate": 4.801369863013699e-07, "log_odds_chosen": 2.402066707611084, "log_odds_ratio": -0.219315305352211, "logits/chosen": 0.9290754795074463, "logits/rejected": 0.9161455631256104, "logps/chosen": -1.7024176120758057, "logps/rejected": -3.9625015258789062, "loss": 0.5851, "nll_loss": 0.563183069229126, "rewards/accuracies": 1.0, "rewards/chosen": -0.17024177312850952, "rewards/margins": 0.22600838541984558, "rewards/rejected": -0.3962501585483551, "step": 3795 }, { "epoch": 10.392881587953456, "grad_norm": 2.9554800987243652, "learning_rate": 4.8e-07, "log_odds_chosen": 2.597019910812378, "log_odds_ratio": -0.27393656969070435, "logits/chosen": 1.1580030918121338, "logits/rejected": 1.185929775238037, "logps/chosen": -1.7087693214416504, "logps/rejected": -4.12581205368042, "loss": 0.6443, "nll_loss": 0.6169247031211853, "rewards/accuracies": 0.875, "rewards/chosen": -0.17087693512439728, "rewards/margins": 0.2417042851448059, "rewards/rejected": -0.412581205368042, "step": 3796 }, { "epoch": 10.395619438740589, "grad_norm": 3.2861645221710205, "learning_rate": 4.798630136986302e-07, "log_odds_chosen": 1.817320466041565, "log_odds_ratio": -0.23927812278270721, "logits/chosen": 0.9542148113250732, "logits/rejected": 0.8675960898399353, "logps/chosen": -1.4787113666534424, "logps/rejected": -3.0627784729003906, "loss": 0.5231, "nll_loss": 0.4992211163043976, "rewards/accuracies": 1.0, "rewards/chosen": -0.14787113666534424, "rewards/margins": 0.15840670466423035, "rewards/rejected": -0.3062778413295746, "step": 3797 }, { "epoch": 10.39835728952772, "grad_norm": 3.612962484359741, "learning_rate": 4.797260273972603e-07, "log_odds_chosen": 4.403049945831299, "log_odds_ratio": -0.09203209728002548, "logits/chosen": 1.027217984199524, "logits/rejected": 1.0557913780212402, "logps/chosen": -2.2798402309417725, "logps/rejected": -6.531134128570557, "loss": 0.6924, "nll_loss": 0.6831512451171875, "rewards/accuracies": 1.0, "rewards/chosen": -0.22798402607440948, "rewards/margins": 0.42512941360473633, "rewards/rejected": -0.6531134247779846, "step": 3798 }, { "epoch": 10.401095140314853, "grad_norm": 3.376300096511841, "learning_rate": 4.795890410958904e-07, "log_odds_chosen": 1.7851272821426392, "log_odds_ratio": -0.21370568871498108, "logits/chosen": 0.7822850346565247, "logits/rejected": 0.7517910599708557, "logps/chosen": -1.4856507778167725, "logps/rejected": -2.9819958209991455, "loss": 0.5738, "nll_loss": 0.5524680018424988, "rewards/accuracies": 1.0, "rewards/chosen": -0.14856508374214172, "rewards/margins": 0.14963451027870178, "rewards/rejected": -0.2981995940208435, "step": 3799 }, { "epoch": 10.403832991101986, "grad_norm": 3.7933666706085205, "learning_rate": 4.794520547945205e-07, "log_odds_chosen": 1.331534504890442, "log_odds_ratio": -0.3564516007900238, "logits/chosen": 0.8580469489097595, "logits/rejected": 0.778504490852356, "logps/chosen": -1.2313029766082764, "logps/rejected": -2.2557709217071533, "loss": 0.5458, "nll_loss": 0.5101615190505981, "rewards/accuracies": 0.875, "rewards/chosen": -0.12313029170036316, "rewards/margins": 0.10244680196046829, "rewards/rejected": -0.22557708621025085, "step": 3800 }, { "epoch": 10.406570841889117, "grad_norm": 3.5312411785125732, "learning_rate": 4.793150684931507e-07, "log_odds_chosen": 2.156637191772461, "log_odds_ratio": -0.2771969437599182, "logits/chosen": 0.8455239534378052, "logits/rejected": 0.8351293802261353, "logps/chosen": -1.635634422302246, "logps/rejected": -3.639932870864868, "loss": 0.57, "nll_loss": 0.5422414541244507, "rewards/accuracies": 1.0, "rewards/chosen": -0.16356344521045685, "rewards/margins": 0.20042982697486877, "rewards/rejected": -0.3639932870864868, "step": 3801 }, { "epoch": 10.40930869267625, "grad_norm": 3.092153310775757, "learning_rate": 4.791780821917808e-07, "log_odds_chosen": 3.5381760597229004, "log_odds_ratio": -0.08705505728721619, "logits/chosen": 0.8977372646331787, "logits/rejected": 0.8830434083938599, "logps/chosen": -2.114452362060547, "logps/rejected": -5.513723373413086, "loss": 0.6863, "nll_loss": 0.6775946617126465, "rewards/accuracies": 1.0, "rewards/chosen": -0.21144524216651917, "rewards/margins": 0.3399270474910736, "rewards/rejected": -0.5513722896575928, "step": 3802 }, { "epoch": 10.412046543463381, "grad_norm": 3.301222801208496, "learning_rate": 4.79041095890411e-07, "log_odds_chosen": 1.6471278667449951, "log_odds_ratio": -0.30556589365005493, "logits/chosen": 0.8913767337799072, "logits/rejected": 0.9055683612823486, "logps/chosen": -1.487202525138855, "logps/rejected": -2.9730722904205322, "loss": 0.5402, "nll_loss": 0.5096753835678101, "rewards/accuracies": 0.875, "rewards/chosen": -0.14872026443481445, "rewards/margins": 0.14858697354793549, "rewards/rejected": -0.29730725288391113, "step": 3803 }, { "epoch": 10.414784394250514, "grad_norm": 2.9672064781188965, "learning_rate": 4.789041095890411e-07, "log_odds_chosen": 2.0881245136260986, "log_odds_ratio": -0.2162768840789795, "logits/chosen": 0.8172912001609802, "logits/rejected": 0.8108142614364624, "logps/chosen": -2.075721502304077, "logps/rejected": -3.9896695613861084, "loss": 0.628, "nll_loss": 0.6063686609268188, "rewards/accuracies": 0.875, "rewards/chosen": -0.20757216215133667, "rewards/margins": 0.19139480590820312, "rewards/rejected": -0.3989669680595398, "step": 3804 }, { "epoch": 10.417522245037645, "grad_norm": 3.8398680686950684, "learning_rate": 4.787671232876712e-07, "log_odds_chosen": 2.5001049041748047, "log_odds_ratio": -0.1761205643415451, "logits/chosen": 1.1381194591522217, "logits/rejected": 1.144709587097168, "logps/chosen": -2.221764087677002, "logps/rejected": -4.593679428100586, "loss": 0.6423, "nll_loss": 0.6247149109840393, "rewards/accuracies": 1.0, "rewards/chosen": -0.22217638790607452, "rewards/margins": 0.23719152808189392, "rewards/rejected": -0.45936793088912964, "step": 3805 }, { "epoch": 10.420260095824778, "grad_norm": 5.763917922973633, "learning_rate": 4.786301369863014e-07, "log_odds_chosen": 0.701419472694397, "log_odds_ratio": -0.5666367411613464, "logits/chosen": 0.9103931188583374, "logits/rejected": 0.8357173800468445, "logps/chosen": -1.8679956197738647, "logps/rejected": -2.48099946975708, "loss": 0.6547, "nll_loss": 0.598044753074646, "rewards/accuracies": 0.875, "rewards/chosen": -0.18679958581924438, "rewards/margins": 0.0613003671169281, "rewards/rejected": -0.2480999231338501, "step": 3806 }, { "epoch": 10.42299794661191, "grad_norm": 5.346734046936035, "learning_rate": 4.784931506849315e-07, "log_odds_chosen": 3.1277740001678467, "log_odds_ratio": -0.3306778073310852, "logits/chosen": 1.097200632095337, "logits/rejected": 1.0448979139328003, "logps/chosen": -2.0827367305755615, "logps/rejected": -5.041427135467529, "loss": 0.7178, "nll_loss": 0.6847231984138489, "rewards/accuracies": 0.875, "rewards/chosen": -0.20827364921569824, "rewards/margins": 0.2958690822124481, "rewards/rejected": -0.5041427612304688, "step": 3807 }, { "epoch": 10.425735797399042, "grad_norm": 3.568952798843384, "learning_rate": 4.783561643835617e-07, "log_odds_chosen": 2.499372720718384, "log_odds_ratio": -0.2377096265554428, "logits/chosen": 1.0039936304092407, "logits/rejected": 1.0191203355789185, "logps/chosen": -2.4790968894958496, "logps/rejected": -4.871317386627197, "loss": 0.66, "nll_loss": 0.6361831426620483, "rewards/accuracies": 0.875, "rewards/chosen": -0.24790969491004944, "rewards/margins": 0.23922204971313477, "rewards/rejected": -0.4871317446231842, "step": 3808 }, { "epoch": 10.428473648186174, "grad_norm": 4.038022041320801, "learning_rate": 4.782191780821918e-07, "log_odds_chosen": 2.916428565979004, "log_odds_ratio": -0.17381733655929565, "logits/chosen": 0.9594939351081848, "logits/rejected": 0.9650131464004517, "logps/chosen": -1.8683565855026245, "logps/rejected": -4.598945140838623, "loss": 0.5871, "nll_loss": 0.5697124004364014, "rewards/accuracies": 1.0, "rewards/chosen": -0.1868356615304947, "rewards/margins": 0.2730588912963867, "rewards/rejected": -0.4598945379257202, "step": 3809 }, { "epoch": 10.431211498973306, "grad_norm": 4.296515464782715, "learning_rate": 4.780821917808219e-07, "log_odds_chosen": 2.3394460678100586, "log_odds_ratio": -0.2736908793449402, "logits/chosen": 0.9514909982681274, "logits/rejected": 0.9346104860305786, "logps/chosen": -2.589792013168335, "logps/rejected": -4.8398027420043945, "loss": 0.6004, "nll_loss": 0.5730658769607544, "rewards/accuracies": 0.875, "rewards/chosen": -0.2589792013168335, "rewards/margins": 0.22500109672546387, "rewards/rejected": -0.48398029804229736, "step": 3810 }, { "epoch": 10.433949349760438, "grad_norm": 4.240173816680908, "learning_rate": 4.779452054794521e-07, "log_odds_chosen": 3.3636317253112793, "log_odds_ratio": -0.28031644225120544, "logits/chosen": 0.8055440783500671, "logits/rejected": 0.7448288202285767, "logps/chosen": -1.8112742900848389, "logps/rejected": -4.9405517578125, "loss": 0.6502, "nll_loss": 0.6222175359725952, "rewards/accuracies": 0.875, "rewards/chosen": -0.1811274290084839, "rewards/margins": 0.312927782535553, "rewards/rejected": -0.49405521154403687, "step": 3811 }, { "epoch": 10.43668720054757, "grad_norm": 7.8137617111206055, "learning_rate": 4.778082191780822e-07, "log_odds_chosen": 0.7714202404022217, "log_odds_ratio": -0.9508427381515503, "logits/chosen": 0.9604097604751587, "logits/rejected": 0.9420583844184875, "logps/chosen": -2.6118295192718506, "logps/rejected": -3.2817742824554443, "loss": 0.6982, "nll_loss": 0.603147029876709, "rewards/accuracies": 0.75, "rewards/chosen": -0.2611829340457916, "rewards/margins": 0.06699445843696594, "rewards/rejected": -0.32817742228507996, "step": 3812 }, { "epoch": 10.439425051334702, "grad_norm": 4.907442569732666, "learning_rate": 4.776712328767123e-07, "log_odds_chosen": 1.2889511585235596, "log_odds_ratio": -0.4946129024028778, "logits/chosen": 1.088131070137024, "logits/rejected": 1.0630464553833008, "logps/chosen": -2.2823033332824707, "logps/rejected": -3.501437187194824, "loss": 0.6037, "nll_loss": 0.5542490482330322, "rewards/accuracies": 0.75, "rewards/chosen": -0.2282303273677826, "rewards/margins": 0.12191341072320938, "rewards/rejected": -0.3501437306404114, "step": 3813 }, { "epoch": 10.442162902121835, "grad_norm": 3.2915806770324707, "learning_rate": 4.775342465753425e-07, "log_odds_chosen": 3.1123225688934326, "log_odds_ratio": -0.1700599044561386, "logits/chosen": 1.1405586004257202, "logits/rejected": 1.1168091297149658, "logps/chosen": -1.6494425535202026, "logps/rejected": -4.565771102905273, "loss": 0.5935, "nll_loss": 0.576488196849823, "rewards/accuracies": 0.875, "rewards/chosen": -0.16494426131248474, "rewards/margins": 0.29163283109664917, "rewards/rejected": -0.4565771222114563, "step": 3814 }, { "epoch": 10.444900752908966, "grad_norm": 3.90077805519104, "learning_rate": 4.773972602739726e-07, "log_odds_chosen": 1.7145456075668335, "log_odds_ratio": -0.34285563230514526, "logits/chosen": 0.9273805618286133, "logits/rejected": 0.8407122492790222, "logps/chosen": -1.8075071573257446, "logps/rejected": -3.4218053817749023, "loss": 0.5864, "nll_loss": 0.5521360635757446, "rewards/accuracies": 0.875, "rewards/chosen": -0.1807507425546646, "rewards/margins": 0.16142985224723816, "rewards/rejected": -0.3421805799007416, "step": 3815 }, { "epoch": 10.447638603696099, "grad_norm": 3.3738763332366943, "learning_rate": 4.772602739726027e-07, "log_odds_chosen": 2.5602030754089355, "log_odds_ratio": -0.11849325895309448, "logits/chosen": 1.1794893741607666, "logits/rejected": 1.207571029663086, "logps/chosen": -2.0923361778259277, "logps/rejected": -4.493677139282227, "loss": 0.5093, "nll_loss": 0.49749845266342163, "rewards/accuracies": 1.0, "rewards/chosen": -0.2092336118221283, "rewards/margins": 0.24013406038284302, "rewards/rejected": -0.4493677020072937, "step": 3816 }, { "epoch": 10.45037645448323, "grad_norm": 3.8040289878845215, "learning_rate": 4.771232876712328e-07, "log_odds_chosen": 1.6452313661575317, "log_odds_ratio": -0.333486944437027, "logits/chosen": 1.012759804725647, "logits/rejected": 0.9786933064460754, "logps/chosen": -1.7470065355300903, "logps/rejected": -3.2814488410949707, "loss": 0.5792, "nll_loss": 0.5458993911743164, "rewards/accuracies": 0.875, "rewards/chosen": -0.17470064759254456, "rewards/margins": 0.15344423055648804, "rewards/rejected": -0.328144907951355, "step": 3817 }, { "epoch": 10.453114305270363, "grad_norm": 3.2967653274536133, "learning_rate": 4.76986301369863e-07, "log_odds_chosen": 2.6993772983551025, "log_odds_ratio": -0.1959582269191742, "logits/chosen": 0.9261775016784668, "logits/rejected": 0.9453545808792114, "logps/chosen": -2.2432055473327637, "logps/rejected": -4.809710502624512, "loss": 0.6392, "nll_loss": 0.6196430325508118, "rewards/accuracies": 0.875, "rewards/chosen": -0.22432056069374084, "rewards/margins": 0.25665050745010376, "rewards/rejected": -0.4809710681438446, "step": 3818 }, { "epoch": 10.455852156057494, "grad_norm": 3.9097201824188232, "learning_rate": 4.768493150684931e-07, "log_odds_chosen": 2.279904365539551, "log_odds_ratio": -0.23009809851646423, "logits/chosen": 0.7215701341629028, "logits/rejected": 0.6677125692367554, "logps/chosen": -1.9852962493896484, "logps/rejected": -4.121498107910156, "loss": 0.7119, "nll_loss": 0.6888575553894043, "rewards/accuracies": 0.875, "rewards/chosen": -0.19852963089942932, "rewards/margins": 0.2136201560497284, "rewards/rejected": -0.4121497869491577, "step": 3819 }, { "epoch": 10.458590006844627, "grad_norm": 3.8020577430725098, "learning_rate": 4.7671232876712324e-07, "log_odds_chosen": 1.7994239330291748, "log_odds_ratio": -0.25665420293807983, "logits/chosen": 1.2317728996276855, "logits/rejected": 1.2748469114303589, "logps/chosen": -2.0322537422180176, "logps/rejected": -3.6860337257385254, "loss": 0.5245, "nll_loss": 0.49882638454437256, "rewards/accuracies": 1.0, "rewards/chosen": -0.20322538912296295, "rewards/margins": 0.16537800431251526, "rewards/rejected": -0.368603378534317, "step": 3820 }, { "epoch": 10.461327857631758, "grad_norm": 3.7126357555389404, "learning_rate": 4.7657534246575344e-07, "log_odds_chosen": 2.8619961738586426, "log_odds_ratio": -0.19622927904129028, "logits/chosen": 0.9095598459243774, "logits/rejected": 0.9161025881767273, "logps/chosen": -2.3762011528015137, "logps/rejected": -5.1218461990356445, "loss": 0.6524, "nll_loss": 0.6327312588691711, "rewards/accuracies": 1.0, "rewards/chosen": -0.23762011528015137, "rewards/margins": 0.2745645046234131, "rewards/rejected": -0.5121846199035645, "step": 3821 }, { "epoch": 10.464065708418891, "grad_norm": 8.470797538757324, "learning_rate": 4.7643835616438354e-07, "log_odds_chosen": 1.2385613918304443, "log_odds_ratio": -0.5814136862754822, "logits/chosen": 1.0847442150115967, "logits/rejected": 1.0184667110443115, "logps/chosen": -1.9038565158843994, "logps/rejected": -2.921832799911499, "loss": 0.5743, "nll_loss": 0.5161381363868713, "rewards/accuracies": 0.875, "rewards/chosen": -0.190385639667511, "rewards/margins": 0.10179764032363892, "rewards/rejected": -0.2921832799911499, "step": 3822 }, { "epoch": 10.466803559206022, "grad_norm": 3.5740551948547363, "learning_rate": 4.763013698630137e-07, "log_odds_chosen": 3.764336109161377, "log_odds_ratio": -0.04743126779794693, "logits/chosen": 1.043031930923462, "logits/rejected": 1.051793098449707, "logps/chosen": -2.0632846355438232, "logps/rejected": -5.667315483093262, "loss": 0.6352, "nll_loss": 0.630418062210083, "rewards/accuracies": 1.0, "rewards/chosen": -0.20632845163345337, "rewards/margins": 0.36040306091308594, "rewards/rejected": -0.5667315125465393, "step": 3823 }, { "epoch": 10.469541409993155, "grad_norm": 5.003319263458252, "learning_rate": 4.761643835616438e-07, "log_odds_chosen": 1.2017345428466797, "log_odds_ratio": -0.7726209163665771, "logits/chosen": 1.230574131011963, "logits/rejected": 1.2001490592956543, "logps/chosen": -2.8344993591308594, "logps/rejected": -3.8840742111206055, "loss": 0.6467, "nll_loss": 0.5694437026977539, "rewards/accuracies": 0.875, "rewards/chosen": -0.2834499478340149, "rewards/margins": 0.10495748370885849, "rewards/rejected": -0.3884074091911316, "step": 3824 }, { "epoch": 10.472279260780287, "grad_norm": 3.366745948791504, "learning_rate": 4.7602739726027394e-07, "log_odds_chosen": 3.9957680702209473, "log_odds_ratio": -0.10901513695716858, "logits/chosen": 1.0436955690383911, "logits/rejected": 1.0922884941101074, "logps/chosen": -1.6322605609893799, "logps/rejected": -5.387100696563721, "loss": 0.5288, "nll_loss": 0.5179014205932617, "rewards/accuracies": 1.0, "rewards/chosen": -0.16322606801986694, "rewards/margins": 0.37548398971557617, "rewards/rejected": -0.5387100577354431, "step": 3825 }, { "epoch": 10.47501711156742, "grad_norm": 3.161161422729492, "learning_rate": 4.758904109589041e-07, "log_odds_chosen": 2.79390811920166, "log_odds_ratio": -0.1229846179485321, "logits/chosen": 1.0663864612579346, "logits/rejected": 1.1013367176055908, "logps/chosen": -1.9226762056350708, "logps/rejected": -4.511816024780273, "loss": 0.5349, "nll_loss": 0.5226074457168579, "rewards/accuracies": 1.0, "rewards/chosen": -0.19226762652397156, "rewards/margins": 0.2589139938354492, "rewards/rejected": -0.4511816203594208, "step": 3826 }, { "epoch": 10.477754962354553, "grad_norm": 4.151122093200684, "learning_rate": 4.757534246575342e-07, "log_odds_chosen": 3.9759175777435303, "log_odds_ratio": -0.2938905656337738, "logits/chosen": 1.2218265533447266, "logits/rejected": 1.2477943897247314, "logps/chosen": -1.6210947036743164, "logps/rejected": -5.352043628692627, "loss": 0.6315, "nll_loss": 0.602066695690155, "rewards/accuracies": 0.875, "rewards/chosen": -0.16210947930812836, "rewards/margins": 0.37309491634368896, "rewards/rejected": -0.5352044105529785, "step": 3827 }, { "epoch": 10.480492813141684, "grad_norm": 7.620760440826416, "learning_rate": 4.756164383561644e-07, "log_odds_chosen": 0.43172138929367065, "log_odds_ratio": -0.7344151735305786, "logits/chosen": 0.9134385585784912, "logits/rejected": 0.8422690033912659, "logps/chosen": -2.712160348892212, "logps/rejected": -2.9618358612060547, "loss": 0.6863, "nll_loss": 0.612821102142334, "rewards/accuracies": 0.75, "rewards/chosen": -0.2712160348892212, "rewards/margins": 0.024967540055513382, "rewards/rejected": -0.29618358612060547, "step": 3828 }, { "epoch": 10.483230663928817, "grad_norm": 3.1426050662994385, "learning_rate": 4.754794520547945e-07, "log_odds_chosen": 3.4811501502990723, "log_odds_ratio": -0.16957227885723114, "logits/chosen": 0.9986464977264404, "logits/rejected": 1.0441217422485352, "logps/chosen": -1.8798508644104004, "logps/rejected": -5.22567081451416, "loss": 0.6042, "nll_loss": 0.587279200553894, "rewards/accuracies": 1.0, "rewards/chosen": -0.18798506259918213, "rewards/margins": 0.33458197116851807, "rewards/rejected": -0.522567093372345, "step": 3829 }, { "epoch": 10.485968514715948, "grad_norm": 3.1372315883636475, "learning_rate": 4.7534246575342465e-07, "log_odds_chosen": 1.4778800010681152, "log_odds_ratio": -0.24358725547790527, "logits/chosen": 0.9263330698013306, "logits/rejected": 0.8800817131996155, "logps/chosen": -1.8002362251281738, "logps/rejected": -3.1192550659179688, "loss": 0.5371, "nll_loss": 0.5127223134040833, "rewards/accuracies": 1.0, "rewards/chosen": -0.18002362549304962, "rewards/margins": 0.13190189003944397, "rewards/rejected": -0.3119255304336548, "step": 3830 }, { "epoch": 10.48870636550308, "grad_norm": 4.48063850402832, "learning_rate": 4.7520547945205475e-07, "log_odds_chosen": 3.6589863300323486, "log_odds_ratio": -0.14171254634857178, "logits/chosen": 1.0136878490447998, "logits/rejected": 1.0579044818878174, "logps/chosen": -1.6439510583877563, "logps/rejected": -5.071314811706543, "loss": 0.7298, "nll_loss": 0.715648889541626, "rewards/accuracies": 1.0, "rewards/chosen": -0.16439509391784668, "rewards/margins": 0.3427364230155945, "rewards/rejected": -0.5071315169334412, "step": 3831 }, { "epoch": 10.491444216290212, "grad_norm": 5.837155342102051, "learning_rate": 4.750684931506849e-07, "log_odds_chosen": 0.6033614873886108, "log_odds_ratio": -0.4537007808685303, "logits/chosen": 0.8015255331993103, "logits/rejected": 0.7926934957504272, "logps/chosen": -3.3452491760253906, "logps/rejected": -3.911686658859253, "loss": 0.6603, "nll_loss": 0.6149505376815796, "rewards/accuracies": 0.875, "rewards/chosen": -0.334524929523468, "rewards/margins": 0.05664375051856041, "rewards/rejected": -0.3911686837673187, "step": 3832 }, { "epoch": 10.494182067077345, "grad_norm": 3.803818941116333, "learning_rate": 4.7493150684931505e-07, "log_odds_chosen": 2.2147912979125977, "log_odds_ratio": -0.3468206822872162, "logits/chosen": 0.9869402050971985, "logits/rejected": 0.978264570236206, "logps/chosen": -2.1117522716522217, "logps/rejected": -4.220251083374023, "loss": 0.5675, "nll_loss": 0.5328384041786194, "rewards/accuracies": 1.0, "rewards/chosen": -0.21117523312568665, "rewards/margins": 0.21084986627101898, "rewards/rejected": -0.42202508449554443, "step": 3833 }, { "epoch": 10.496919917864476, "grad_norm": 7.334997653961182, "learning_rate": 4.7479452054794515e-07, "log_odds_chosen": 1.5149784088134766, "log_odds_ratio": -0.2957848012447357, "logits/chosen": 0.9469501972198486, "logits/rejected": 0.9217379689216614, "logps/chosen": -2.031801223754883, "logps/rejected": -3.4186153411865234, "loss": 0.5664, "nll_loss": 0.5368135571479797, "rewards/accuracies": 0.875, "rewards/chosen": -0.20318011939525604, "rewards/margins": 0.13868144154548645, "rewards/rejected": -0.3418615460395813, "step": 3834 }, { "epoch": 10.499657768651609, "grad_norm": 3.7510626316070557, "learning_rate": 4.7465753424657536e-07, "log_odds_chosen": 2.4991581439971924, "log_odds_ratio": -0.2140813171863556, "logits/chosen": 0.9002240896224976, "logits/rejected": 0.8314429521560669, "logps/chosen": -1.892501711845398, "logps/rejected": -4.194032669067383, "loss": 0.5781, "nll_loss": 0.556723952293396, "rewards/accuracies": 1.0, "rewards/chosen": -0.1892501711845398, "rewards/margins": 0.2301531434059143, "rewards/rejected": -0.4194032847881317, "step": 3835 }, { "epoch": 10.50239561943874, "grad_norm": 3.3378360271453857, "learning_rate": 4.7452054794520546e-07, "log_odds_chosen": 2.8969340324401855, "log_odds_ratio": -0.1844193935394287, "logits/chosen": 0.7944822311401367, "logits/rejected": 0.8239744901657104, "logps/chosen": -2.1699016094207764, "logps/rejected": -4.893401145935059, "loss": 0.5901, "nll_loss": 0.5716453790664673, "rewards/accuracies": 1.0, "rewards/chosen": -0.2169901728630066, "rewards/margins": 0.2723499536514282, "rewards/rejected": -0.4893401265144348, "step": 3836 }, { "epoch": 10.505133470225873, "grad_norm": 3.986280679702759, "learning_rate": 4.743835616438356e-07, "log_odds_chosen": 1.3582481145858765, "log_odds_ratio": -0.2618557810783386, "logits/chosen": 1.0028932094573975, "logits/rejected": 0.8555933237075806, "logps/chosen": -1.89173424243927, "logps/rejected": -3.0965728759765625, "loss": 0.5872, "nll_loss": 0.5610294342041016, "rewards/accuracies": 1.0, "rewards/chosen": -0.18917343020439148, "rewards/margins": 0.12048386037349701, "rewards/rejected": -0.3096572756767273, "step": 3837 }, { "epoch": 10.507871321013004, "grad_norm": 3.578543186187744, "learning_rate": 4.742465753424657e-07, "log_odds_chosen": 4.339549541473389, "log_odds_ratio": -0.09249342232942581, "logits/chosen": 0.8389934301376343, "logits/rejected": 0.7324084639549255, "logps/chosen": -1.9686717987060547, "logps/rejected": -6.122440338134766, "loss": 0.6907, "nll_loss": 0.681463897228241, "rewards/accuracies": 1.0, "rewards/chosen": -0.1968671828508377, "rewards/margins": 0.4153768718242645, "rewards/rejected": -0.6122440695762634, "step": 3838 }, { "epoch": 10.510609171800137, "grad_norm": 2.9389214515686035, "learning_rate": 4.7410958904109586e-07, "log_odds_chosen": 4.605415344238281, "log_odds_ratio": -0.11611559987068176, "logits/chosen": 1.0476056337356567, "logits/rejected": 1.0675190687179565, "logps/chosen": -2.224972724914551, "logps/rejected": -6.711421012878418, "loss": 0.598, "nll_loss": 0.5863492488861084, "rewards/accuracies": 1.0, "rewards/chosen": -0.22249728441238403, "rewards/margins": 0.44864481687545776, "rewards/rejected": -0.6711421012878418, "step": 3839 }, { "epoch": 10.513347022587268, "grad_norm": 3.761887550354004, "learning_rate": 4.73972602739726e-07, "log_odds_chosen": 3.6123528480529785, "log_odds_ratio": -0.15677599608898163, "logits/chosen": 0.9372275471687317, "logits/rejected": 0.9500558972358704, "logps/chosen": -2.3179492950439453, "logps/rejected": -5.752443313598633, "loss": 0.613, "nll_loss": 0.5973098278045654, "rewards/accuracies": 1.0, "rewards/chosen": -0.23179495334625244, "rewards/margins": 0.3434494137763977, "rewards/rejected": -0.5752443671226501, "step": 3840 }, { "epoch": 10.516084873374401, "grad_norm": 3.108071804046631, "learning_rate": 4.738356164383561e-07, "log_odds_chosen": 2.6629691123962402, "log_odds_ratio": -0.17956951260566711, "logits/chosen": 1.0188124179840088, "logits/rejected": 0.9038434028625488, "logps/chosen": -1.3108881711959839, "logps/rejected": -3.6656901836395264, "loss": 0.6193, "nll_loss": 0.6013055443763733, "rewards/accuracies": 1.0, "rewards/chosen": -0.13108882308006287, "rewards/margins": 0.23548021912574768, "rewards/rejected": -0.36656904220581055, "step": 3841 }, { "epoch": 10.518822724161533, "grad_norm": 3.188804864883423, "learning_rate": 4.736986301369863e-07, "log_odds_chosen": 2.8284430503845215, "log_odds_ratio": -0.14367443323135376, "logits/chosen": 0.7883724570274353, "logits/rejected": 0.8289762139320374, "logps/chosen": -1.7794263362884521, "logps/rejected": -4.3777079582214355, "loss": 0.6748, "nll_loss": 0.6604310870170593, "rewards/accuracies": 1.0, "rewards/chosen": -0.17794263362884521, "rewards/margins": 0.2598281502723694, "rewards/rejected": -0.437770813703537, "step": 3842 }, { "epoch": 10.521560574948666, "grad_norm": 3.386878728866577, "learning_rate": 4.735616438356164e-07, "log_odds_chosen": 2.209336996078491, "log_odds_ratio": -0.27562299370765686, "logits/chosen": 0.9110835790634155, "logits/rejected": 0.8779705166816711, "logps/chosen": -2.1169121265411377, "logps/rejected": -4.231302261352539, "loss": 0.6402, "nll_loss": 0.6126211881637573, "rewards/accuracies": 1.0, "rewards/chosen": -0.2116912305355072, "rewards/margins": 0.21143902838230133, "rewards/rejected": -0.4231302738189697, "step": 3843 }, { "epoch": 10.524298425735797, "grad_norm": 3.626222610473633, "learning_rate": 4.734246575342465e-07, "log_odds_chosen": 2.6277995109558105, "log_odds_ratio": -0.15266960859298706, "logits/chosen": 0.9716222286224365, "logits/rejected": 1.0116803646087646, "logps/chosen": -2.563453197479248, "logps/rejected": -5.084497928619385, "loss": 0.7455, "nll_loss": 0.7302650213241577, "rewards/accuracies": 1.0, "rewards/chosen": -0.25634533166885376, "rewards/margins": 0.2521044909954071, "rewards/rejected": -0.5084497928619385, "step": 3844 }, { "epoch": 10.52703627652293, "grad_norm": 3.3875880241394043, "learning_rate": 4.732876712328767e-07, "log_odds_chosen": 2.7857415676116943, "log_odds_ratio": -0.17762117087841034, "logits/chosen": 1.109547734260559, "logits/rejected": 1.1480615139007568, "logps/chosen": -1.8622442483901978, "logps/rejected": -4.464250087738037, "loss": 0.5972, "nll_loss": 0.5794557929039001, "rewards/accuracies": 1.0, "rewards/chosen": -0.18622443079948425, "rewards/margins": 0.2602005898952484, "rewards/rejected": -0.44642502069473267, "step": 3845 }, { "epoch": 10.529774127310061, "grad_norm": 3.1580567359924316, "learning_rate": 4.731506849315068e-07, "log_odds_chosen": 1.996032953262329, "log_odds_ratio": -0.18095481395721436, "logits/chosen": 0.8770128488540649, "logits/rejected": 0.9028576612472534, "logps/chosen": -2.1396994590759277, "logps/rejected": -3.968991279602051, "loss": 0.56, "nll_loss": 0.5418635606765747, "rewards/accuracies": 1.0, "rewards/chosen": -0.21396994590759277, "rewards/margins": 0.18292918801307678, "rewards/rejected": -0.39689916372299194, "step": 3846 }, { "epoch": 10.532511978097194, "grad_norm": 5.667369365692139, "learning_rate": 4.7301369863013697e-07, "log_odds_chosen": 1.447322130203247, "log_odds_ratio": -0.6185586452484131, "logits/chosen": 0.8984185457229614, "logits/rejected": 0.833493173122406, "logps/chosen": -2.3311357498168945, "logps/rejected": -3.6491339206695557, "loss": 0.7143, "nll_loss": 0.6524525284767151, "rewards/accuracies": 0.875, "rewards/chosen": -0.23311357200145721, "rewards/margins": 0.13179980218410492, "rewards/rejected": -0.3649134039878845, "step": 3847 }, { "epoch": 10.535249828884325, "grad_norm": 3.48704195022583, "learning_rate": 4.7287671232876707e-07, "log_odds_chosen": 2.3182499408721924, "log_odds_ratio": -0.19605201482772827, "logits/chosen": 0.9400652050971985, "logits/rejected": 0.8601200580596924, "logps/chosen": -1.7891473770141602, "logps/rejected": -3.91776180267334, "loss": 0.602, "nll_loss": 0.5824292898178101, "rewards/accuracies": 1.0, "rewards/chosen": -0.17891474068164825, "rewards/margins": 0.21286143362522125, "rewards/rejected": -0.3917761743068695, "step": 3848 }, { "epoch": 10.537987679671458, "grad_norm": 3.8786871433258057, "learning_rate": 4.727397260273973e-07, "log_odds_chosen": 2.209947109222412, "log_odds_ratio": -0.360401451587677, "logits/chosen": 1.0106706619262695, "logits/rejected": 0.9721925258636475, "logps/chosen": -1.9695888757705688, "logps/rejected": -3.9930338859558105, "loss": 0.6948, "nll_loss": 0.6587269902229309, "rewards/accuracies": 0.875, "rewards/chosen": -0.19695889949798584, "rewards/margins": 0.20234449207782745, "rewards/rejected": -0.3993034064769745, "step": 3849 }, { "epoch": 10.54072553045859, "grad_norm": 3.300670623779297, "learning_rate": 4.726027397260274e-07, "log_odds_chosen": 3.255375862121582, "log_odds_ratio": -0.19019633531570435, "logits/chosen": 1.0493682622909546, "logits/rejected": 1.0126866102218628, "logps/chosen": -1.7051149606704712, "logps/rejected": -4.742503643035889, "loss": 0.6511, "nll_loss": 0.6321189403533936, "rewards/accuracies": 1.0, "rewards/chosen": -0.17051148414611816, "rewards/margins": 0.30373892188072205, "rewards/rejected": -0.4742503762245178, "step": 3850 }, { "epoch": 10.543463381245722, "grad_norm": 3.3079657554626465, "learning_rate": 4.724657534246575e-07, "log_odds_chosen": 2.4610865116119385, "log_odds_ratio": -0.20691964030265808, "logits/chosen": 1.0623348951339722, "logits/rejected": 1.0912038087844849, "logps/chosen": -1.6647439002990723, "logps/rejected": -3.9007740020751953, "loss": 0.633, "nll_loss": 0.6123343706130981, "rewards/accuracies": 0.875, "rewards/chosen": -0.16647440195083618, "rewards/margins": 0.22360298037528992, "rewards/rejected": -0.3900773823261261, "step": 3851 }, { "epoch": 10.546201232032853, "grad_norm": 5.1981282234191895, "learning_rate": 4.723287671232877e-07, "log_odds_chosen": 2.0305097103118896, "log_odds_ratio": -0.2222082018852234, "logits/chosen": 0.9608327746391296, "logits/rejected": 0.9509332180023193, "logps/chosen": -1.9222445487976074, "logps/rejected": -3.830352306365967, "loss": 0.7696, "nll_loss": 0.7473488450050354, "rewards/accuracies": 1.0, "rewards/chosen": -0.19222445785999298, "rewards/margins": 0.19081078469753265, "rewards/rejected": -0.38303524255752563, "step": 3852 }, { "epoch": 10.548939082819986, "grad_norm": 3.9344563484191895, "learning_rate": 4.721917808219178e-07, "log_odds_chosen": 2.8557679653167725, "log_odds_ratio": -0.2754591405391693, "logits/chosen": 0.770613431930542, "logits/rejected": 0.7286529541015625, "logps/chosen": -1.9355155229568481, "logps/rejected": -4.675329208374023, "loss": 0.564, "nll_loss": 0.5364910364151001, "rewards/accuracies": 1.0, "rewards/chosen": -0.19355155527591705, "rewards/margins": 0.27398133277893066, "rewards/rejected": -0.4675329029560089, "step": 3853 }, { "epoch": 10.55167693360712, "grad_norm": 3.3404171466827393, "learning_rate": 4.7205479452054793e-07, "log_odds_chosen": 3.243879795074463, "log_odds_ratio": -0.19289487600326538, "logits/chosen": 1.0525715351104736, "logits/rejected": 1.113405466079712, "logps/chosen": -1.1377007961273193, "logps/rejected": -3.9456396102905273, "loss": 0.5219, "nll_loss": 0.5025841593742371, "rewards/accuracies": 0.875, "rewards/chosen": -0.11377008259296417, "rewards/margins": 0.28079384565353394, "rewards/rejected": -0.3945639729499817, "step": 3854 }, { "epoch": 10.55441478439425, "grad_norm": 3.4770150184631348, "learning_rate": 4.7191780821917803e-07, "log_odds_chosen": 1.5459561347961426, "log_odds_ratio": -0.26105740666389465, "logits/chosen": 1.0687625408172607, "logits/rejected": 1.0084640979766846, "logps/chosen": -1.9608831405639648, "logps/rejected": -3.3786487579345703, "loss": 0.5621, "nll_loss": 0.535957932472229, "rewards/accuracies": 1.0, "rewards/chosen": -0.19608831405639648, "rewards/margins": 0.14177659153938293, "rewards/rejected": -0.3378649353981018, "step": 3855 }, { "epoch": 10.557152635181383, "grad_norm": 3.3746821880340576, "learning_rate": 4.7178082191780823e-07, "log_odds_chosen": 3.20406174659729, "log_odds_ratio": -0.16478782892227173, "logits/chosen": 1.0976860523223877, "logits/rejected": 1.0851112604141235, "logps/chosen": -2.2013931274414062, "logps/rejected": -5.279725551605225, "loss": 0.6288, "nll_loss": 0.6123133301734924, "rewards/accuracies": 1.0, "rewards/chosen": -0.2201393097639084, "rewards/margins": 0.3078332543373108, "rewards/rejected": -0.5279725790023804, "step": 3856 }, { "epoch": 10.559890485968515, "grad_norm": 3.5613820552825928, "learning_rate": 4.7164383561643833e-07, "log_odds_chosen": 2.2816691398620605, "log_odds_ratio": -0.24428677558898926, "logits/chosen": 0.8766202926635742, "logits/rejected": 0.8383151888847351, "logps/chosen": -1.655666470527649, "logps/rejected": -3.7421631813049316, "loss": 0.5656, "nll_loss": 0.5411972403526306, "rewards/accuracies": 1.0, "rewards/chosen": -0.16556665301322937, "rewards/margins": 0.208649680018425, "rewards/rejected": -0.37421631813049316, "step": 3857 }, { "epoch": 10.562628336755647, "grad_norm": 3.791788339614868, "learning_rate": 4.7150684931506843e-07, "log_odds_chosen": 2.094381332397461, "log_odds_ratio": -0.20170056819915771, "logits/chosen": 1.049353837966919, "logits/rejected": 1.1071282625198364, "logps/chosen": -2.7880144119262695, "logps/rejected": -4.781955718994141, "loss": 0.7143, "nll_loss": 0.6941577196121216, "rewards/accuracies": 1.0, "rewards/chosen": -0.27880144119262695, "rewards/margins": 0.19939415156841278, "rewards/rejected": -0.4781956076622009, "step": 3858 }, { "epoch": 10.565366187542779, "grad_norm": 4.212484836578369, "learning_rate": 4.7136986301369864e-07, "log_odds_chosen": 2.476783275604248, "log_odds_ratio": -0.22062236070632935, "logits/chosen": 0.9729402661323547, "logits/rejected": 0.996819257736206, "logps/chosen": -1.8604052066802979, "logps/rejected": -4.119101047515869, "loss": 0.6442, "nll_loss": 0.6221302151679993, "rewards/accuracies": 0.875, "rewards/chosen": -0.18604052066802979, "rewards/margins": 0.22586959600448608, "rewards/rejected": -0.41191014647483826, "step": 3859 }, { "epoch": 10.568104038329912, "grad_norm": 4.118995189666748, "learning_rate": 4.7123287671232874e-07, "log_odds_chosen": 1.347071647644043, "log_odds_ratio": -0.4300405979156494, "logits/chosen": 0.9372501373291016, "logits/rejected": 0.8928902745246887, "logps/chosen": -1.7628936767578125, "logps/rejected": -2.8771157264709473, "loss": 0.5763, "nll_loss": 0.5333397388458252, "rewards/accuracies": 0.75, "rewards/chosen": -0.176289364695549, "rewards/margins": 0.11142219603061676, "rewards/rejected": -0.28771156072616577, "step": 3860 }, { "epoch": 10.570841889117043, "grad_norm": 6.6686248779296875, "learning_rate": 4.710958904109589e-07, "log_odds_chosen": 2.5799174308776855, "log_odds_ratio": -0.4546216130256653, "logits/chosen": 1.0655323266983032, "logits/rejected": 1.063831090927124, "logps/chosen": -2.095031261444092, "logps/rejected": -4.410616874694824, "loss": 0.6806, "nll_loss": 0.6351239681243896, "rewards/accuracies": 0.875, "rewards/chosen": -0.2095031440258026, "rewards/margins": 0.23155859112739563, "rewards/rejected": -0.44106170535087585, "step": 3861 }, { "epoch": 10.573579739904176, "grad_norm": 3.7068207263946533, "learning_rate": 4.70958904109589e-07, "log_odds_chosen": 3.804265022277832, "log_odds_ratio": -0.17700399458408356, "logits/chosen": 0.9787046909332275, "logits/rejected": 1.007101058959961, "logps/chosen": -2.578469753265381, "logps/rejected": -6.109358787536621, "loss": 0.6287, "nll_loss": 0.6109727025032043, "rewards/accuracies": 0.875, "rewards/chosen": -0.25784698128700256, "rewards/margins": 0.3530888855457306, "rewards/rejected": -0.6109359264373779, "step": 3862 }, { "epoch": 10.576317590691307, "grad_norm": 6.383683204650879, "learning_rate": 4.708219178082192e-07, "log_odds_chosen": 1.9502394199371338, "log_odds_ratio": -0.4688854217529297, "logits/chosen": 1.0756597518920898, "logits/rejected": 1.0188194513320923, "logps/chosen": -1.917661428451538, "logps/rejected": -3.763150215148926, "loss": 0.6086, "nll_loss": 0.5617002248764038, "rewards/accuracies": 0.625, "rewards/chosen": -0.191766157746315, "rewards/margins": 0.18454886972904205, "rewards/rejected": -0.37631499767303467, "step": 3863 }, { "epoch": 10.57905544147844, "grad_norm": 7.183218002319336, "learning_rate": 4.706849315068493e-07, "log_odds_chosen": 1.0667150020599365, "log_odds_ratio": -0.4934092164039612, "logits/chosen": 1.1845378875732422, "logits/rejected": 1.1821925640106201, "logps/chosen": -2.049349308013916, "logps/rejected": -3.035291910171509, "loss": 0.6717, "nll_loss": 0.6223539113998413, "rewards/accuracies": 0.75, "rewards/chosen": -0.20493492484092712, "rewards/margins": 0.09859427064657211, "rewards/rejected": -0.30352920293807983, "step": 3864 }, { "epoch": 10.581793292265571, "grad_norm": 3.679589033126831, "learning_rate": 4.705479452054794e-07, "log_odds_chosen": 2.110292434692383, "log_odds_ratio": -0.2831577658653259, "logits/chosen": 1.214744210243225, "logits/rejected": 1.1845614910125732, "logps/chosen": -1.9945073127746582, "logps/rejected": -3.9284286499023438, "loss": 0.6313, "nll_loss": 0.6030205488204956, "rewards/accuracies": 0.875, "rewards/chosen": -0.19945074617862701, "rewards/margins": 0.19339212775230408, "rewards/rejected": -0.3928428590297699, "step": 3865 }, { "epoch": 10.584531143052704, "grad_norm": 4.275328636169434, "learning_rate": 4.704109589041096e-07, "log_odds_chosen": 2.7753243446350098, "log_odds_ratio": -0.11353904753923416, "logits/chosen": 1.0843796730041504, "logits/rejected": 1.0914667844772339, "logps/chosen": -1.8654240369796753, "logps/rejected": -4.403270244598389, "loss": 0.613, "nll_loss": 0.6016604900360107, "rewards/accuracies": 1.0, "rewards/chosen": -0.18654242157936096, "rewards/margins": 0.2537845969200134, "rewards/rejected": -0.440326988697052, "step": 3866 }, { "epoch": 10.587268993839835, "grad_norm": 6.295995712280273, "learning_rate": 4.702739726027397e-07, "log_odds_chosen": 1.7722259759902954, "log_odds_ratio": -0.6401998996734619, "logits/chosen": 1.009912371635437, "logits/rejected": 0.9081664681434631, "logps/chosen": -1.8019877672195435, "logps/rejected": -3.4294228553771973, "loss": 0.6545, "nll_loss": 0.5904659032821655, "rewards/accuracies": 0.875, "rewards/chosen": -0.1801987886428833, "rewards/margins": 0.1627434939146042, "rewards/rejected": -0.3429422974586487, "step": 3867 }, { "epoch": 10.590006844626968, "grad_norm": 3.068608045578003, "learning_rate": 4.7013698630136985e-07, "log_odds_chosen": 2.68005633354187, "log_odds_ratio": -0.15988706052303314, "logits/chosen": 1.369361162185669, "logits/rejected": 1.3819234371185303, "logps/chosen": -1.969307780265808, "logps/rejected": -4.530486583709717, "loss": 0.5734, "nll_loss": 0.5573968291282654, "rewards/accuracies": 1.0, "rewards/chosen": -0.19693079590797424, "rewards/margins": 0.25611788034439087, "rewards/rejected": -0.4530487060546875, "step": 3868 }, { "epoch": 10.5927446954141, "grad_norm": 3.3775691986083984, "learning_rate": 4.6999999999999995e-07, "log_odds_chosen": 2.654261589050293, "log_odds_ratio": -0.18643596768379211, "logits/chosen": 0.9607977867126465, "logits/rejected": 0.9884375333786011, "logps/chosen": -2.955684185028076, "logps/rejected": -5.439470291137695, "loss": 0.6376, "nll_loss": 0.6189929842948914, "rewards/accuracies": 1.0, "rewards/chosen": -0.29556840658187866, "rewards/margins": 0.24837863445281982, "rewards/rejected": -0.5439470410346985, "step": 3869 }, { "epoch": 10.595482546201232, "grad_norm": 5.317183494567871, "learning_rate": 4.6986301369863015e-07, "log_odds_chosen": 1.8449698686599731, "log_odds_ratio": -0.3122628331184387, "logits/chosen": 0.7856642007827759, "logits/rejected": 0.7739571332931519, "logps/chosen": -2.9936459064483643, "logps/rejected": -4.724048614501953, "loss": 0.6548, "nll_loss": 0.623595654964447, "rewards/accuracies": 0.875, "rewards/chosen": -0.2993645966053009, "rewards/margins": 0.17304030060768127, "rewards/rejected": -0.4724048972129822, "step": 3870 }, { "epoch": 10.598220396988363, "grad_norm": 2.9898574352264404, "learning_rate": 4.6972602739726025e-07, "log_odds_chosen": 5.132320404052734, "log_odds_ratio": -0.07151682674884796, "logits/chosen": 1.2203049659729004, "logits/rejected": 1.2360209226608276, "logps/chosen": -1.7938406467437744, "logps/rejected": -6.6393022537231445, "loss": 0.6296, "nll_loss": 0.6224018931388855, "rewards/accuracies": 1.0, "rewards/chosen": -0.1793840527534485, "rewards/margins": 0.4845461845397949, "rewards/rejected": -0.6639302372932434, "step": 3871 }, { "epoch": 10.600958247775496, "grad_norm": 4.197479724884033, "learning_rate": 4.6958904109589035e-07, "log_odds_chosen": 1.8484892845153809, "log_odds_ratio": -0.34225842356681824, "logits/chosen": 0.9191166162490845, "logits/rejected": 0.951971173286438, "logps/chosen": -2.8370213508605957, "logps/rejected": -4.6140007972717285, "loss": 0.6766, "nll_loss": 0.6423584222793579, "rewards/accuracies": 0.875, "rewards/chosen": -0.28370213508605957, "rewards/margins": 0.17769795656204224, "rewards/rejected": -0.4614000916481018, "step": 3872 }, { "epoch": 10.603696098562628, "grad_norm": 3.948352098464966, "learning_rate": 4.6945205479452056e-07, "log_odds_chosen": 2.686681032180786, "log_odds_ratio": -0.21185851097106934, "logits/chosen": 0.8002941608428955, "logits/rejected": 0.7750985026359558, "logps/chosen": -1.8584465980529785, "logps/rejected": -4.365386962890625, "loss": 0.7084, "nll_loss": 0.6872444748878479, "rewards/accuracies": 0.875, "rewards/chosen": -0.18584467470645905, "rewards/margins": 0.25069403648376465, "rewards/rejected": -0.4365387260913849, "step": 3873 }, { "epoch": 10.60643394934976, "grad_norm": 3.4701552391052246, "learning_rate": 4.6931506849315065e-07, "log_odds_chosen": 2.0091214179992676, "log_odds_ratio": -0.25788068771362305, "logits/chosen": 0.9642614722251892, "logits/rejected": 0.9143254160881042, "logps/chosen": -1.5942561626434326, "logps/rejected": -3.3099584579467773, "loss": 0.6151, "nll_loss": 0.5892708897590637, "rewards/accuracies": 1.0, "rewards/chosen": -0.15942561626434326, "rewards/margins": 0.17157024145126343, "rewards/rejected": -0.3309958577156067, "step": 3874 }, { "epoch": 10.609171800136892, "grad_norm": 3.338205099105835, "learning_rate": 4.691780821917808e-07, "log_odds_chosen": 2.7842395305633545, "log_odds_ratio": -0.3177522122859955, "logits/chosen": 0.7822790741920471, "logits/rejected": 0.7831874489784241, "logps/chosen": -2.1771585941314697, "logps/rejected": -4.824683666229248, "loss": 0.6185, "nll_loss": 0.5867676734924316, "rewards/accuracies": 0.875, "rewards/chosen": -0.21771585941314697, "rewards/margins": 0.26475250720977783, "rewards/rejected": -0.4824683666229248, "step": 3875 }, { "epoch": 10.611909650924025, "grad_norm": 3.899831771850586, "learning_rate": 4.6904109589041096e-07, "log_odds_chosen": 2.1700439453125, "log_odds_ratio": -0.27762117981910706, "logits/chosen": 1.0779399871826172, "logits/rejected": 1.0843300819396973, "logps/chosen": -2.184138774871826, "logps/rejected": -4.195104122161865, "loss": 0.6003, "nll_loss": 0.5725871920585632, "rewards/accuracies": 0.875, "rewards/chosen": -0.21841388940811157, "rewards/margins": 0.20109649002552032, "rewards/rejected": -0.4195103943347931, "step": 3876 }, { "epoch": 10.614647501711158, "grad_norm": 4.314558982849121, "learning_rate": 4.689041095890411e-07, "log_odds_chosen": 1.2381401062011719, "log_odds_ratio": -0.34003350138664246, "logits/chosen": 1.084413766860962, "logits/rejected": 0.9867089986801147, "logps/chosen": -1.121719479560852, "logps/rejected": -2.136455774307251, "loss": 0.485, "nll_loss": 0.4510315954685211, "rewards/accuracies": 1.0, "rewards/chosen": -0.1121719554066658, "rewards/margins": 0.1014736220240593, "rewards/rejected": -0.2136455923318863, "step": 3877 }, { "epoch": 10.617385352498289, "grad_norm": 3.4283607006073, "learning_rate": 4.687671232876712e-07, "log_odds_chosen": 4.570305347442627, "log_odds_ratio": -0.12719906866550446, "logits/chosen": 0.8989956378936768, "logits/rejected": 0.8984206318855286, "logps/chosen": -2.3688793182373047, "logps/rejected": -6.8151044845581055, "loss": 0.6729, "nll_loss": 0.6602262854576111, "rewards/accuracies": 1.0, "rewards/chosen": -0.23688793182373047, "rewards/margins": 0.44462254643440247, "rewards/rejected": -0.6815104484558105, "step": 3878 }, { "epoch": 10.62012320328542, "grad_norm": 3.941387414932251, "learning_rate": 4.686301369863013e-07, "log_odds_chosen": 2.329742670059204, "log_odds_ratio": -0.4665960371494293, "logits/chosen": 1.3219506740570068, "logits/rejected": 1.385585069656372, "logps/chosen": -2.393087387084961, "logps/rejected": -4.612662315368652, "loss": 0.6644, "nll_loss": 0.6177661418914795, "rewards/accuracies": 0.875, "rewards/chosen": -0.23930872976779938, "rewards/margins": 0.2219574749469757, "rewards/rejected": -0.4612661898136139, "step": 3879 }, { "epoch": 10.622861054072553, "grad_norm": 3.3171353340148926, "learning_rate": 4.684931506849315e-07, "log_odds_chosen": 2.7104105949401855, "log_odds_ratio": -0.12535998225212097, "logits/chosen": 0.9431912302970886, "logits/rejected": 0.9783361554145813, "logps/chosen": -1.7784440517425537, "logps/rejected": -4.278298377990723, "loss": 0.5577, "nll_loss": 0.5451837778091431, "rewards/accuracies": 1.0, "rewards/chosen": -0.17784440517425537, "rewards/margins": 0.24998541176319122, "rewards/rejected": -0.4278298318386078, "step": 3880 }, { "epoch": 10.625598904859686, "grad_norm": 3.49760365486145, "learning_rate": 4.683561643835616e-07, "log_odds_chosen": 3.0927348136901855, "log_odds_ratio": -0.09985524415969849, "logits/chosen": 0.7426865100860596, "logits/rejected": 0.5233291387557983, "logps/chosen": -1.5013313293457031, "logps/rejected": -4.323939323425293, "loss": 0.565, "nll_loss": 0.5550590753555298, "rewards/accuracies": 1.0, "rewards/chosen": -0.1501331329345703, "rewards/margins": 0.28226083517074585, "rewards/rejected": -0.43239399790763855, "step": 3881 }, { "epoch": 10.628336755646817, "grad_norm": 3.4615378379821777, "learning_rate": 4.6821917808219177e-07, "log_odds_chosen": 2.422240734100342, "log_odds_ratio": -0.24581637978553772, "logits/chosen": 1.021554946899414, "logits/rejected": 1.0099539756774902, "logps/chosen": -1.7856261730194092, "logps/rejected": -4.079781532287598, "loss": 0.5575, "nll_loss": 0.5329154133796692, "rewards/accuracies": 0.875, "rewards/chosen": -0.17856262624263763, "rewards/margins": 0.22941556572914124, "rewards/rejected": -0.4079781472682953, "step": 3882 }, { "epoch": 10.63107460643395, "grad_norm": 3.5520713329315186, "learning_rate": 4.680821917808219e-07, "log_odds_chosen": 1.3141056299209595, "log_odds_ratio": -0.2913077473640442, "logits/chosen": 0.8646724224090576, "logits/rejected": 0.8073567748069763, "logps/chosen": -1.9247004985809326, "logps/rejected": -3.113550901412964, "loss": 0.5684, "nll_loss": 0.5392626523971558, "rewards/accuracies": 1.0, "rewards/chosen": -0.19247004389762878, "rewards/margins": 0.11888503283262253, "rewards/rejected": -0.3113551139831543, "step": 3883 }, { "epoch": 10.633812457221081, "grad_norm": 3.5557668209075928, "learning_rate": 4.6794520547945207e-07, "log_odds_chosen": 1.630378007888794, "log_odds_ratio": -0.23472818732261658, "logits/chosen": 0.8055918216705322, "logits/rejected": 0.6004941463470459, "logps/chosen": -1.5200399160385132, "logps/rejected": -2.889284133911133, "loss": 0.5499, "nll_loss": 0.526419997215271, "rewards/accuracies": 1.0, "rewards/chosen": -0.15200400352478027, "rewards/margins": 0.13692441582679749, "rewards/rejected": -0.28892841935157776, "step": 3884 }, { "epoch": 10.636550308008214, "grad_norm": 3.8128392696380615, "learning_rate": 4.6780821917808217e-07, "log_odds_chosen": 3.0265071392059326, "log_odds_ratio": -0.17786046862602234, "logits/chosen": 0.9139970541000366, "logits/rejected": 0.8727864027023315, "logps/chosen": -1.736546277999878, "logps/rejected": -4.535146713256836, "loss": 0.633, "nll_loss": 0.6151939630508423, "rewards/accuracies": 1.0, "rewards/chosen": -0.17365464568138123, "rewards/margins": 0.2798600494861603, "rewards/rejected": -0.4535146653652191, "step": 3885 }, { "epoch": 10.639288158795345, "grad_norm": 2.9647860527038574, "learning_rate": 4.6767123287671227e-07, "log_odds_chosen": 4.68070650100708, "log_odds_ratio": -0.15681356191635132, "logits/chosen": 0.8081648349761963, "logits/rejected": 0.8558415770530701, "logps/chosen": -1.9046134948730469, "logps/rejected": -6.430219650268555, "loss": 0.6224, "nll_loss": 0.6067225933074951, "rewards/accuracies": 0.875, "rewards/chosen": -0.19046135246753693, "rewards/margins": 0.45256054401397705, "rewards/rejected": -0.6430219411849976, "step": 3886 }, { "epoch": 10.642026009582478, "grad_norm": 3.2364683151245117, "learning_rate": 4.6753424657534247e-07, "log_odds_chosen": 2.9216408729553223, "log_odds_ratio": -0.22841423749923706, "logits/chosen": 1.0890028476715088, "logits/rejected": 1.0918543338775635, "logps/chosen": -1.6349083185195923, "logps/rejected": -4.391106128692627, "loss": 0.6394, "nll_loss": 0.6165845394134521, "rewards/accuracies": 1.0, "rewards/chosen": -0.16349083185195923, "rewards/margins": 0.275619775056839, "rewards/rejected": -0.4391106069087982, "step": 3887 }, { "epoch": 10.64476386036961, "grad_norm": 3.519141674041748, "learning_rate": 4.6739726027397257e-07, "log_odds_chosen": 2.7188801765441895, "log_odds_ratio": -0.1954602301120758, "logits/chosen": 0.9069342017173767, "logits/rejected": 0.9482104778289795, "logps/chosen": -2.4481396675109863, "logps/rejected": -5.063161373138428, "loss": 0.5722, "nll_loss": 0.5526071786880493, "rewards/accuracies": 1.0, "rewards/chosen": -0.2448139637708664, "rewards/margins": 0.2615021765232086, "rewards/rejected": -0.5063161253929138, "step": 3888 }, { "epoch": 10.647501711156742, "grad_norm": 4.327538967132568, "learning_rate": 4.672602739726027e-07, "log_odds_chosen": 3.809232234954834, "log_odds_ratio": -0.13917669653892517, "logits/chosen": 1.0400248765945435, "logits/rejected": 1.052564024925232, "logps/chosen": -2.366373300552368, "logps/rejected": -5.988656044006348, "loss": 0.7076, "nll_loss": 0.6936753988265991, "rewards/accuracies": 0.875, "rewards/chosen": -0.23663733899593353, "rewards/margins": 0.36222824454307556, "rewards/rejected": -0.5988655686378479, "step": 3889 }, { "epoch": 10.650239561943874, "grad_norm": 5.08430814743042, "learning_rate": 4.671232876712329e-07, "log_odds_chosen": 1.1620728969573975, "log_odds_ratio": -0.4843074679374695, "logits/chosen": 0.9715718030929565, "logits/rejected": 0.828734278678894, "logps/chosen": -2.156790018081665, "logps/rejected": -3.164572238922119, "loss": 0.6372, "nll_loss": 0.5888156890869141, "rewards/accuracies": 0.875, "rewards/chosen": -0.21567900478839874, "rewards/margins": 0.1007782369852066, "rewards/rejected": -0.31645724177360535, "step": 3890 }, { "epoch": 10.652977412731007, "grad_norm": 3.9782936573028564, "learning_rate": 4.66986301369863e-07, "log_odds_chosen": 1.4544596672058105, "log_odds_ratio": -0.3191435933113098, "logits/chosen": 0.9899165630340576, "logits/rejected": 1.012141227722168, "logps/chosen": -2.17702317237854, "logps/rejected": -3.4526872634887695, "loss": 0.5616, "nll_loss": 0.5296786427497864, "rewards/accuracies": 1.0, "rewards/chosen": -0.21770232915878296, "rewards/margins": 0.127566397190094, "rewards/rejected": -0.34526872634887695, "step": 3891 }, { "epoch": 10.655715263518138, "grad_norm": 3.36592173576355, "learning_rate": 4.6684931506849313e-07, "log_odds_chosen": 2.936316967010498, "log_odds_ratio": -0.12683837115764618, "logits/chosen": 1.1249611377716064, "logits/rejected": 1.1132383346557617, "logps/chosen": -1.9562844038009644, "logps/rejected": -4.727453231811523, "loss": 0.5491, "nll_loss": 0.5363742113113403, "rewards/accuracies": 1.0, "rewards/chosen": -0.19562843441963196, "rewards/margins": 0.27711692452430725, "rewards/rejected": -0.4727453589439392, "step": 3892 }, { "epoch": 10.65845311430527, "grad_norm": 3.0775275230407715, "learning_rate": 4.667123287671232e-07, "log_odds_chosen": 3.7079761028289795, "log_odds_ratio": -0.14382392168045044, "logits/chosen": 0.9834247827529907, "logits/rejected": 0.9427142143249512, "logps/chosen": -1.5243198871612549, "logps/rejected": -5.012913227081299, "loss": 0.5662, "nll_loss": 0.5518126487731934, "rewards/accuracies": 1.0, "rewards/chosen": -0.15243199467658997, "rewards/margins": 0.34885936975479126, "rewards/rejected": -0.5012913942337036, "step": 3893 }, { "epoch": 10.661190965092402, "grad_norm": 4.134716510772705, "learning_rate": 4.6657534246575343e-07, "log_odds_chosen": 1.111627221107483, "log_odds_ratio": -0.4391556680202484, "logits/chosen": 0.9151629209518433, "logits/rejected": 0.9010814428329468, "logps/chosen": -2.148419141769409, "logps/rejected": -3.0822319984436035, "loss": 0.6196, "nll_loss": 0.5757244825363159, "rewards/accuracies": 0.75, "rewards/chosen": -0.21484190225601196, "rewards/margins": 0.09338130056858063, "rewards/rejected": -0.3082231879234314, "step": 3894 }, { "epoch": 10.663928815879535, "grad_norm": 8.712242126464844, "learning_rate": 4.6643835616438353e-07, "log_odds_chosen": 1.1444625854492188, "log_odds_ratio": -0.6311789155006409, "logits/chosen": 0.9358131885528564, "logits/rejected": 0.8358498811721802, "logps/chosen": -2.8103652000427246, "logps/rejected": -3.8347747325897217, "loss": 0.7752, "nll_loss": 0.7120811939239502, "rewards/accuracies": 0.75, "rewards/chosen": -0.2810365557670593, "rewards/margins": 0.10244093090295792, "rewards/rejected": -0.38347747921943665, "step": 3895 }, { "epoch": 10.666666666666666, "grad_norm": 5.137002944946289, "learning_rate": 4.663013698630137e-07, "log_odds_chosen": 1.2153359651565552, "log_odds_ratio": -0.45019277930259705, "logits/chosen": 1.1426403522491455, "logits/rejected": 1.187199592590332, "logps/chosen": -2.01694917678833, "logps/rejected": -3.150122880935669, "loss": 0.5784, "nll_loss": 0.5333418250083923, "rewards/accuracies": 0.75, "rewards/chosen": -0.20169487595558167, "rewards/margins": 0.11331740766763687, "rewards/rejected": -0.3150123059749603, "step": 3896 }, { "epoch": 10.669404517453799, "grad_norm": 3.4182019233703613, "learning_rate": 4.6616438356164383e-07, "log_odds_chosen": 1.8555147647857666, "log_odds_ratio": -0.24007654190063477, "logits/chosen": 0.9855237007141113, "logits/rejected": 0.911422610282898, "logps/chosen": -1.4132683277130127, "logps/rejected": -3.025784492492676, "loss": 0.5457, "nll_loss": 0.521649956703186, "rewards/accuracies": 1.0, "rewards/chosen": -0.14132682979106903, "rewards/margins": 0.16125160455703735, "rewards/rejected": -0.3025784492492676, "step": 3897 }, { "epoch": 10.67214236824093, "grad_norm": 3.99520206451416, "learning_rate": 4.6602739726027393e-07, "log_odds_chosen": 1.7414369583129883, "log_odds_ratio": -0.3138900101184845, "logits/chosen": 1.000348687171936, "logits/rejected": 0.9824166297912598, "logps/chosen": -2.3200461864471436, "logps/rejected": -3.8760950565338135, "loss": 0.5497, "nll_loss": 0.5182790160179138, "rewards/accuracies": 1.0, "rewards/chosen": -0.23200464248657227, "rewards/margins": 0.15560486912727356, "rewards/rejected": -0.3876095116138458, "step": 3898 }, { "epoch": 10.674880219028063, "grad_norm": 4.376080513000488, "learning_rate": 4.658904109589041e-07, "log_odds_chosen": 1.011435866355896, "log_odds_ratio": -0.3669590651988983, "logits/chosen": 0.803817629814148, "logits/rejected": 0.7666928768157959, "logps/chosen": -2.162503242492676, "logps/rejected": -3.0349693298339844, "loss": 0.5256, "nll_loss": 0.48889297246932983, "rewards/accuracies": 0.875, "rewards/chosen": -0.21625034511089325, "rewards/margins": 0.0872466117143631, "rewards/rejected": -0.30349695682525635, "step": 3899 }, { "epoch": 10.677618069815194, "grad_norm": 3.4733283519744873, "learning_rate": 4.657534246575342e-07, "log_odds_chosen": 2.5947976112365723, "log_odds_ratio": -0.271618515253067, "logits/chosen": 0.7800418734550476, "logits/rejected": 0.7794206142425537, "logps/chosen": -2.217508554458618, "logps/rejected": -4.677915096282959, "loss": 0.5321, "nll_loss": 0.5049726366996765, "rewards/accuracies": 0.75, "rewards/chosen": -0.221750870347023, "rewards/margins": 0.24604065716266632, "rewards/rejected": -0.46779149770736694, "step": 3900 }, { "epoch": 10.680355920602327, "grad_norm": 3.9355406761169434, "learning_rate": 4.656164383561644e-07, "log_odds_chosen": 2.874260187149048, "log_odds_ratio": -0.16572453081607819, "logits/chosen": 1.3479249477386475, "logits/rejected": 1.4042954444885254, "logps/chosen": -2.2662103176116943, "logps/rejected": -5.003016948699951, "loss": 0.5429, "nll_loss": 0.5263573527336121, "rewards/accuracies": 1.0, "rewards/chosen": -0.22662106156349182, "rewards/margins": 0.2736806571483612, "rewards/rejected": -0.5003016591072083, "step": 3901 }, { "epoch": 10.683093771389458, "grad_norm": 3.0871121883392334, "learning_rate": 4.654794520547945e-07, "log_odds_chosen": 5.365571975708008, "log_odds_ratio": -0.04103678837418556, "logits/chosen": 1.1597626209259033, "logits/rejected": 1.2172437906265259, "logps/chosen": -2.475322723388672, "logps/rejected": -7.709050178527832, "loss": 0.7041, "nll_loss": 0.7000115513801575, "rewards/accuracies": 1.0, "rewards/chosen": -0.24753230810165405, "rewards/margins": 0.5233727693557739, "rewards/rejected": -0.7709051370620728, "step": 3902 }, { "epoch": 10.685831622176591, "grad_norm": 3.5190017223358154, "learning_rate": 4.6534246575342464e-07, "log_odds_chosen": 1.8891807794570923, "log_odds_ratio": -0.18578536808490753, "logits/chosen": 1.0016086101531982, "logits/rejected": 1.0032954216003418, "logps/chosen": -1.886599063873291, "logps/rejected": -3.5878970623016357, "loss": 0.5535, "nll_loss": 0.5349152088165283, "rewards/accuracies": 1.0, "rewards/chosen": -0.1886599212884903, "rewards/margins": 0.17012980580329895, "rewards/rejected": -0.35878971219062805, "step": 3903 }, { "epoch": 10.688569472963724, "grad_norm": 3.723250389099121, "learning_rate": 4.652054794520548e-07, "log_odds_chosen": 1.1552510261535645, "log_odds_ratio": -0.3967664837837219, "logits/chosen": 1.0528531074523926, "logits/rejected": 0.9791324138641357, "logps/chosen": -2.705080509185791, "logps/rejected": -3.7808096408843994, "loss": 0.6687, "nll_loss": 0.6289951801300049, "rewards/accuracies": 1.0, "rewards/chosen": -0.2705080509185791, "rewards/margins": 0.10757289826869965, "rewards/rejected": -0.37808096408843994, "step": 3904 }, { "epoch": 10.691307323750856, "grad_norm": 3.6542534828186035, "learning_rate": 4.650684931506849e-07, "log_odds_chosen": 4.68881893157959, "log_odds_ratio": -0.09449447691440582, "logits/chosen": 1.244150161743164, "logits/rejected": 1.216435194015503, "logps/chosen": -2.323944568634033, "logps/rejected": -6.8376545906066895, "loss": 0.6127, "nll_loss": 0.6032655835151672, "rewards/accuracies": 1.0, "rewards/chosen": -0.23239445686340332, "rewards/margins": 0.4513710141181946, "rewards/rejected": -0.6837654709815979, "step": 3905 }, { "epoch": 10.694045174537987, "grad_norm": 3.545504570007324, "learning_rate": 4.6493150684931504e-07, "log_odds_chosen": 2.5279436111450195, "log_odds_ratio": -0.20601822435855865, "logits/chosen": 0.9946749806404114, "logits/rejected": 0.8945231437683105, "logps/chosen": -1.386960744857788, "logps/rejected": -3.7053394317626953, "loss": 0.5121, "nll_loss": 0.49152982234954834, "rewards/accuracies": 1.0, "rewards/chosen": -0.1386960744857788, "rewards/margins": 0.23183786869049072, "rewards/rejected": -0.37053394317626953, "step": 3906 }, { "epoch": 10.69678302532512, "grad_norm": 3.0593652725219727, "learning_rate": 4.6479452054794514e-07, "log_odds_chosen": 3.296070098876953, "log_odds_ratio": -0.13843059539794922, "logits/chosen": 1.0595526695251465, "logits/rejected": 1.038817048072815, "logps/chosen": -1.869495153427124, "logps/rejected": -4.999630451202393, "loss": 0.6589, "nll_loss": 0.6450642347335815, "rewards/accuracies": 1.0, "rewards/chosen": -0.18694952130317688, "rewards/margins": 0.3130135238170624, "rewards/rejected": -0.49996304512023926, "step": 3907 }, { "epoch": 10.699520876112253, "grad_norm": 3.2430360317230225, "learning_rate": 4.6465753424657535e-07, "log_odds_chosen": 4.1460862159729, "log_odds_ratio": -0.19096817076206207, "logits/chosen": 1.345215082168579, "logits/rejected": 1.3520162105560303, "logps/chosen": -2.5212531089782715, "logps/rejected": -6.520153522491455, "loss": 0.6096, "nll_loss": 0.5905444025993347, "rewards/accuracies": 0.875, "rewards/chosen": -0.2521253228187561, "rewards/margins": 0.3998900055885315, "rewards/rejected": -0.6520153284072876, "step": 3908 }, { "epoch": 10.702258726899384, "grad_norm": 3.009746551513672, "learning_rate": 4.6452054794520545e-07, "log_odds_chosen": 3.8978538513183594, "log_odds_ratio": -0.08542089909315109, "logits/chosen": 1.1381809711456299, "logits/rejected": 1.0939475297927856, "logps/chosen": -1.83638334274292, "logps/rejected": -5.496552467346191, "loss": 0.6074, "nll_loss": 0.5988954305648804, "rewards/accuracies": 1.0, "rewards/chosen": -0.18363834917545319, "rewards/margins": 0.3660169243812561, "rewards/rejected": -0.5496552586555481, "step": 3909 }, { "epoch": 10.704996577686517, "grad_norm": 6.734983444213867, "learning_rate": 4.643835616438356e-07, "log_odds_chosen": 2.190412998199463, "log_odds_ratio": -0.34994977712631226, "logits/chosen": 1.0379403829574585, "logits/rejected": 1.06309974193573, "logps/chosen": -3.331979751586914, "logps/rejected": -5.343584060668945, "loss": 0.7893, "nll_loss": 0.7542557716369629, "rewards/accuracies": 0.875, "rewards/chosen": -0.33319801092147827, "rewards/margins": 0.20116040110588074, "rewards/rejected": -0.5343583822250366, "step": 3910 }, { "epoch": 10.707734428473648, "grad_norm": 4.761162757873535, "learning_rate": 4.6424657534246575e-07, "log_odds_chosen": 1.432138204574585, "log_odds_ratio": -0.5090908408164978, "logits/chosen": 0.9509404301643372, "logits/rejected": 0.9417974352836609, "logps/chosen": -2.2588868141174316, "logps/rejected": -3.435026168823242, "loss": 0.5789, "nll_loss": 0.5279868245124817, "rewards/accuracies": 0.75, "rewards/chosen": -0.2258886694908142, "rewards/margins": 0.11761394888162613, "rewards/rejected": -0.34350261092185974, "step": 3911 }, { "epoch": 10.710472279260781, "grad_norm": 3.4117588996887207, "learning_rate": 4.6410958904109585e-07, "log_odds_chosen": 2.7156336307525635, "log_odds_ratio": -0.2074560523033142, "logits/chosen": 1.2102806568145752, "logits/rejected": 1.2485134601593018, "logps/chosen": -2.284419536590576, "logps/rejected": -4.877060413360596, "loss": 0.5959, "nll_loss": 0.5751883387565613, "rewards/accuracies": 0.875, "rewards/chosen": -0.22844195365905762, "rewards/margins": 0.25926414132118225, "rewards/rejected": -0.48770609498023987, "step": 3912 }, { "epoch": 10.713210130047912, "grad_norm": 4.999019622802734, "learning_rate": 4.63972602739726e-07, "log_odds_chosen": 2.558053970336914, "log_odds_ratio": -0.3676520586013794, "logits/chosen": 1.2049736976623535, "logits/rejected": 1.2334873676300049, "logps/chosen": -2.5216660499572754, "logps/rejected": -4.896568775177002, "loss": 0.6285, "nll_loss": 0.5917020440101624, "rewards/accuracies": 0.875, "rewards/chosen": -0.25216662883758545, "rewards/margins": 0.23749026656150818, "rewards/rejected": -0.48965686559677124, "step": 3913 }, { "epoch": 10.715947980835045, "grad_norm": 3.603222131729126, "learning_rate": 4.6383561643835616e-07, "log_odds_chosen": 3.00783634185791, "log_odds_ratio": -0.2305912971496582, "logits/chosen": 1.1563891172409058, "logits/rejected": 1.146329402923584, "logps/chosen": -2.1297833919525146, "logps/rejected": -5.045077323913574, "loss": 0.6212, "nll_loss": 0.598153293132782, "rewards/accuracies": 0.875, "rewards/chosen": -0.21297836303710938, "rewards/margins": 0.2915293872356415, "rewards/rejected": -0.5045077800750732, "step": 3914 }, { "epoch": 10.718685831622176, "grad_norm": 4.092331886291504, "learning_rate": 4.636986301369863e-07, "log_odds_chosen": 3.881065607070923, "log_odds_ratio": -0.17785733938217163, "logits/chosen": 1.171614408493042, "logits/rejected": 1.1848310232162476, "logps/chosen": -2.4426727294921875, "logps/rejected": -6.167014122009277, "loss": 0.5689, "nll_loss": 0.5511537194252014, "rewards/accuracies": 0.875, "rewards/chosen": -0.2442672848701477, "rewards/margins": 0.372434139251709, "rewards/rejected": -0.6167013645172119, "step": 3915 }, { "epoch": 10.72142368240931, "grad_norm": 3.716240406036377, "learning_rate": 4.635616438356164e-07, "log_odds_chosen": 0.7927056550979614, "log_odds_ratio": -0.43888404965400696, "logits/chosen": 1.2052582502365112, "logits/rejected": 1.172413945198059, "logps/chosen": -1.614822268486023, "logps/rejected": -2.3120017051696777, "loss": 0.4999, "nll_loss": 0.45604220032691956, "rewards/accuracies": 0.875, "rewards/chosen": -0.16148221492767334, "rewards/margins": 0.06971795111894608, "rewards/rejected": -0.2312001734972, "step": 3916 }, { "epoch": 10.72416153319644, "grad_norm": 3.6640782356262207, "learning_rate": 4.6342465753424656e-07, "log_odds_chosen": 3.3635239601135254, "log_odds_ratio": -0.15057282149791718, "logits/chosen": 0.9464015960693359, "logits/rejected": 0.8429402112960815, "logps/chosen": -1.5702402591705322, "logps/rejected": -4.714555263519287, "loss": 0.5641, "nll_loss": 0.5490774512290955, "rewards/accuracies": 1.0, "rewards/chosen": -0.15702404081821442, "rewards/margins": 0.31443148851394653, "rewards/rejected": -0.47145554423332214, "step": 3917 }, { "epoch": 10.726899383983573, "grad_norm": 3.1241109371185303, "learning_rate": 4.632876712328767e-07, "log_odds_chosen": 2.4593472480773926, "log_odds_ratio": -0.15779930353164673, "logits/chosen": 0.7922382354736328, "logits/rejected": 0.7454895973205566, "logps/chosen": -1.7507803440093994, "logps/rejected": -3.988933563232422, "loss": 0.5458, "nll_loss": 0.5300540924072266, "rewards/accuracies": 1.0, "rewards/chosen": -0.17507803440093994, "rewards/margins": 0.22381532192230225, "rewards/rejected": -0.3988933563232422, "step": 3918 }, { "epoch": 10.729637234770705, "grad_norm": 3.6937875747680664, "learning_rate": 4.631506849315068e-07, "log_odds_chosen": 2.9677910804748535, "log_odds_ratio": -0.3031941056251526, "logits/chosen": 0.9662569761276245, "logits/rejected": 0.9879761934280396, "logps/chosen": -1.9088492393493652, "logps/rejected": -4.761011123657227, "loss": 0.6345, "nll_loss": 0.6042131185531616, "rewards/accuracies": 0.75, "rewards/chosen": -0.19088493287563324, "rewards/margins": 0.28521621227264404, "rewards/rejected": -0.4761011302471161, "step": 3919 }, { "epoch": 10.732375085557837, "grad_norm": 3.616001844406128, "learning_rate": 4.6301369863013696e-07, "log_odds_chosen": 2.0027265548706055, "log_odds_ratio": -0.23441056907176971, "logits/chosen": 0.8015000820159912, "logits/rejected": 0.77443528175354, "logps/chosen": -2.0620312690734863, "logps/rejected": -3.8968775272369385, "loss": 0.5843, "nll_loss": 0.5608172416687012, "rewards/accuracies": 1.0, "rewards/chosen": -0.2062031328678131, "rewards/margins": 0.18348461389541626, "rewards/rejected": -0.38968777656555176, "step": 3920 }, { "epoch": 10.735112936344969, "grad_norm": 4.5632710456848145, "learning_rate": 4.628767123287671e-07, "log_odds_chosen": 1.3506838083267212, "log_odds_ratio": -0.3325984477996826, "logits/chosen": 0.8296489715576172, "logits/rejected": 0.7991304993629456, "logps/chosen": -2.0823895931243896, "logps/rejected": -3.3028483390808105, "loss": 0.5361, "nll_loss": 0.5028399229049683, "rewards/accuracies": 0.875, "rewards/chosen": -0.20823895931243896, "rewards/margins": 0.12204591184854507, "rewards/rejected": -0.33028483390808105, "step": 3921 }, { "epoch": 10.737850787132102, "grad_norm": 3.7862393856048584, "learning_rate": 4.6273972602739727e-07, "log_odds_chosen": 1.9728552103042603, "log_odds_ratio": -0.20031797885894775, "logits/chosen": 0.9903281927108765, "logits/rejected": 1.0924957990646362, "logps/chosen": -2.1769258975982666, "logps/rejected": -4.057863235473633, "loss": 0.6008, "nll_loss": 0.5807815790176392, "rewards/accuracies": 1.0, "rewards/chosen": -0.21769261360168457, "rewards/margins": 0.18809372186660767, "rewards/rejected": -0.40578633546829224, "step": 3922 }, { "epoch": 10.740588637919233, "grad_norm": 3.538381576538086, "learning_rate": 4.6260273972602737e-07, "log_odds_chosen": 2.4885451793670654, "log_odds_ratio": -0.18328793346881866, "logits/chosen": 1.0836788415908813, "logits/rejected": 1.1248846054077148, "logps/chosen": -1.7727789878845215, "logps/rejected": -4.017601490020752, "loss": 0.5955, "nll_loss": 0.5771263837814331, "rewards/accuracies": 1.0, "rewards/chosen": -0.17727789282798767, "rewards/margins": 0.22448226809501648, "rewards/rejected": -0.40176019072532654, "step": 3923 }, { "epoch": 10.743326488706366, "grad_norm": 8.079755783081055, "learning_rate": 4.6246575342465746e-07, "log_odds_chosen": 2.0900416374206543, "log_odds_ratio": -0.3467462658882141, "logits/chosen": 0.9627472162246704, "logits/rejected": 0.986114501953125, "logps/chosen": -2.383920431137085, "logps/rejected": -4.325512886047363, "loss": 0.5582, "nll_loss": 0.5235571265220642, "rewards/accuracies": 0.75, "rewards/chosen": -0.23839205503463745, "rewards/margins": 0.19415923953056335, "rewards/rejected": -0.4325512647628784, "step": 3924 }, { "epoch": 10.746064339493497, "grad_norm": 3.4986870288848877, "learning_rate": 4.6232876712328767e-07, "log_odds_chosen": 3.882225513458252, "log_odds_ratio": -0.10972478240728378, "logits/chosen": 1.2287486791610718, "logits/rejected": 1.263026237487793, "logps/chosen": -2.275111198425293, "logps/rejected": -6.023187160491943, "loss": 0.6181, "nll_loss": 0.6071032881736755, "rewards/accuracies": 1.0, "rewards/chosen": -0.22751109302043915, "rewards/margins": 0.37480759620666504, "rewards/rejected": -0.6023187041282654, "step": 3925 }, { "epoch": 10.74880219028063, "grad_norm": 3.2559597492218018, "learning_rate": 4.6219178082191777e-07, "log_odds_chosen": 3.0602893829345703, "log_odds_ratio": -0.2263791412115097, "logits/chosen": 0.9791152477264404, "logits/rejected": 0.9869347810745239, "logps/chosen": -1.831803321838379, "logps/rejected": -4.74758243560791, "loss": 0.6034, "nll_loss": 0.5807536840438843, "rewards/accuracies": 0.875, "rewards/chosen": -0.1831803321838379, "rewards/margins": 0.29157787561416626, "rewards/rejected": -0.4747582674026489, "step": 3926 }, { "epoch": 10.751540041067761, "grad_norm": 3.3437838554382324, "learning_rate": 4.620547945205479e-07, "log_odds_chosen": 2.9184072017669678, "log_odds_ratio": -0.16012711822986603, "logits/chosen": 0.7771224975585938, "logits/rejected": 0.7054274082183838, "logps/chosen": -1.424663782119751, "logps/rejected": -4.085042953491211, "loss": 0.4524, "nll_loss": 0.4364094138145447, "rewards/accuracies": 1.0, "rewards/chosen": -0.14246639609336853, "rewards/margins": 0.26603788137435913, "rewards/rejected": -0.40850430727005005, "step": 3927 }, { "epoch": 10.754277891854894, "grad_norm": 3.3258683681488037, "learning_rate": 4.6191780821917807e-07, "log_odds_chosen": 1.8629770278930664, "log_odds_ratio": -0.3293613791465759, "logits/chosen": 1.118856430053711, "logits/rejected": 1.0934562683105469, "logps/chosen": -1.7799112796783447, "logps/rejected": -3.481292724609375, "loss": 0.5709, "nll_loss": 0.5379799604415894, "rewards/accuracies": 0.875, "rewards/chosen": -0.1779911369085312, "rewards/margins": 0.1701381355524063, "rewards/rejected": -0.3481292724609375, "step": 3928 }, { "epoch": 10.757015742642025, "grad_norm": 9.216290473937988, "learning_rate": 4.617808219178082e-07, "log_odds_chosen": 4.544692039489746, "log_odds_ratio": -0.28340816497802734, "logits/chosen": 1.2306840419769287, "logits/rejected": 1.1475497484207153, "logps/chosen": -2.4890832901000977, "logps/rejected": -6.886734962463379, "loss": 0.6833, "nll_loss": 0.6549855470657349, "rewards/accuracies": 0.875, "rewards/chosen": -0.24890832602977753, "rewards/margins": 0.43976518511772156, "rewards/rejected": -0.6886734962463379, "step": 3929 }, { "epoch": 10.759753593429158, "grad_norm": 3.604463815689087, "learning_rate": 4.616438356164383e-07, "log_odds_chosen": 3.144188404083252, "log_odds_ratio": -0.23771221935749054, "logits/chosen": 0.9689376950263977, "logits/rejected": 0.9546731114387512, "logps/chosen": -1.8011970520019531, "logps/rejected": -4.74711799621582, "loss": 0.5918, "nll_loss": 0.5680122971534729, "rewards/accuracies": 0.875, "rewards/chosen": -0.18011970818042755, "rewards/margins": 0.29459208250045776, "rewards/rejected": -0.4747118055820465, "step": 3930 }, { "epoch": 10.762491444216291, "grad_norm": 7.225907802581787, "learning_rate": 4.615068493150684e-07, "log_odds_chosen": 1.184371829032898, "log_odds_ratio": -0.5617542266845703, "logits/chosen": 0.9683583378791809, "logits/rejected": 0.9301321506500244, "logps/chosen": -2.961637020111084, "logps/rejected": -4.059797286987305, "loss": 0.7804, "nll_loss": 0.7242512702941895, "rewards/accuracies": 0.75, "rewards/chosen": -0.2961636781692505, "rewards/margins": 0.10981601476669312, "rewards/rejected": -0.405979722738266, "step": 3931 }, { "epoch": 10.765229295003422, "grad_norm": 9.343658447265625, "learning_rate": 4.6136986301369863e-07, "log_odds_chosen": 1.4492961168289185, "log_odds_ratio": -0.4945157766342163, "logits/chosen": 1.0322693586349487, "logits/rejected": 1.0659583806991577, "logps/chosen": -2.6733415126800537, "logps/rejected": -4.008248805999756, "loss": 0.5776, "nll_loss": 0.5281248092651367, "rewards/accuracies": 0.625, "rewards/chosen": -0.2673341631889343, "rewards/margins": 0.1334906965494156, "rewards/rejected": -0.4008249044418335, "step": 3932 }, { "epoch": 10.767967145790553, "grad_norm": 3.2535815238952637, "learning_rate": 4.6123287671232873e-07, "log_odds_chosen": 2.434094190597534, "log_odds_ratio": -0.23013663291931152, "logits/chosen": 0.930435836315155, "logits/rejected": 0.860941469669342, "logps/chosen": -1.591835379600525, "logps/rejected": -3.846442937850952, "loss": 0.5849, "nll_loss": 0.5619176626205444, "rewards/accuracies": 1.0, "rewards/chosen": -0.159183531999588, "rewards/margins": 0.22546076774597168, "rewards/rejected": -0.3846442997455597, "step": 3933 }, { "epoch": 10.770704996577686, "grad_norm": 3.4370055198669434, "learning_rate": 4.610958904109589e-07, "log_odds_chosen": 2.885025978088379, "log_odds_ratio": -0.22589275240898132, "logits/chosen": 1.136179804801941, "logits/rejected": 1.1624871492385864, "logps/chosen": -2.1582510471343994, "logps/rejected": -4.895569801330566, "loss": 0.5591, "nll_loss": 0.5365029573440552, "rewards/accuracies": 0.875, "rewards/chosen": -0.21582511067390442, "rewards/margins": 0.2737318277359009, "rewards/rejected": -0.4895569682121277, "step": 3934 }, { "epoch": 10.77344284736482, "grad_norm": 3.4514832496643066, "learning_rate": 4.6095890410958903e-07, "log_odds_chosen": 3.326450824737549, "log_odds_ratio": -0.15834948420524597, "logits/chosen": 0.8803644180297852, "logits/rejected": 0.8359740972518921, "logps/chosen": -1.767695426940918, "logps/rejected": -4.880387783050537, "loss": 0.5431, "nll_loss": 0.5272907018661499, "rewards/accuracies": 1.0, "rewards/chosen": -0.17676952481269836, "rewards/margins": 0.31126922369003296, "rewards/rejected": -0.4880387783050537, "step": 3935 }, { "epoch": 10.77618069815195, "grad_norm": 3.7559401988983154, "learning_rate": 4.608219178082192e-07, "log_odds_chosen": 1.99455726146698, "log_odds_ratio": -0.258008748292923, "logits/chosen": 1.157609224319458, "logits/rejected": 1.1418681144714355, "logps/chosen": -1.6847217082977295, "logps/rejected": -3.5235848426818848, "loss": 0.4906, "nll_loss": 0.4648095965385437, "rewards/accuracies": 0.875, "rewards/chosen": -0.16847217082977295, "rewards/margins": 0.18388631939888, "rewards/rejected": -0.35235846042633057, "step": 3936 }, { "epoch": 10.778918548939084, "grad_norm": 3.5596745014190674, "learning_rate": 4.606849315068493e-07, "log_odds_chosen": 3.97794771194458, "log_odds_ratio": -0.20972995460033417, "logits/chosen": 0.9588979482650757, "logits/rejected": 0.9313948154449463, "logps/chosen": -1.5427355766296387, "logps/rejected": -5.265110969543457, "loss": 0.5484, "nll_loss": 0.5274225473403931, "rewards/accuracies": 1.0, "rewards/chosen": -0.15427358448505402, "rewards/margins": 0.37223753333091736, "rewards/rejected": -0.5265110731124878, "step": 3937 }, { "epoch": 10.781656399726215, "grad_norm": 3.8848354816436768, "learning_rate": 4.605479452054794e-07, "log_odds_chosen": 1.0224082469940186, "log_odds_ratio": -0.3788919448852539, "logits/chosen": 0.7223018407821655, "logits/rejected": 0.7025159597396851, "logps/chosen": -2.1571192741394043, "logps/rejected": -3.1122806072235107, "loss": 0.6845, "nll_loss": 0.646624743938446, "rewards/accuracies": 1.0, "rewards/chosen": -0.21571192145347595, "rewards/margins": 0.09551617503166199, "rewards/rejected": -0.31122809648513794, "step": 3938 }, { "epoch": 10.784394250513348, "grad_norm": 3.455427408218384, "learning_rate": 4.604109589041096e-07, "log_odds_chosen": 2.3319966793060303, "log_odds_ratio": -0.33877870440483093, "logits/chosen": 0.977662205696106, "logits/rejected": 0.9456003904342651, "logps/chosen": -1.5495296716690063, "logps/rejected": -3.6989850997924805, "loss": 0.5377, "nll_loss": 0.5037775039672852, "rewards/accuracies": 0.875, "rewards/chosen": -0.15495295822620392, "rewards/margins": 0.21494555473327637, "rewards/rejected": -0.3698984980583191, "step": 3939 }, { "epoch": 10.787132101300479, "grad_norm": 3.4318671226501465, "learning_rate": 4.602739726027397e-07, "log_odds_chosen": 3.093930959701538, "log_odds_ratio": -0.19413705170154572, "logits/chosen": 0.8461108207702637, "logits/rejected": 0.8237934112548828, "logps/chosen": -2.128612518310547, "logps/rejected": -5.094326972961426, "loss": 0.6042, "nll_loss": 0.5847766995429993, "rewards/accuracies": 1.0, "rewards/chosen": -0.21286123991012573, "rewards/margins": 0.2965714931488037, "rewards/rejected": -0.5094327330589294, "step": 3940 }, { "epoch": 10.789869952087612, "grad_norm": 3.731914758682251, "learning_rate": 4.6013698630136984e-07, "log_odds_chosen": 3.2445056438446045, "log_odds_ratio": -0.20698282122612, "logits/chosen": 0.8929270505905151, "logits/rejected": 0.8640493154525757, "logps/chosen": -1.5905656814575195, "logps/rejected": -4.644922256469727, "loss": 0.4812, "nll_loss": 0.460501492023468, "rewards/accuracies": 1.0, "rewards/chosen": -0.15905657410621643, "rewards/margins": 0.3054356575012207, "rewards/rejected": -0.46449220180511475, "step": 3941 }, { "epoch": 10.792607802874743, "grad_norm": 3.460102081298828, "learning_rate": 4.6e-07, "log_odds_chosen": 1.6608633995056152, "log_odds_ratio": -0.2636851668357849, "logits/chosen": 0.8593776226043701, "logits/rejected": 0.8440902233123779, "logps/chosen": -1.8412224054336548, "logps/rejected": -3.3089499473571777, "loss": 0.5498, "nll_loss": 0.5234211683273315, "rewards/accuracies": 0.875, "rewards/chosen": -0.1841222643852234, "rewards/margins": 0.14677274227142334, "rewards/rejected": -0.33089500665664673, "step": 3942 }, { "epoch": 10.795345653661876, "grad_norm": 4.423820972442627, "learning_rate": 4.5986301369863014e-07, "log_odds_chosen": 1.6164952516555786, "log_odds_ratio": -0.3356932997703552, "logits/chosen": 0.9581938982009888, "logits/rejected": 0.9491094350814819, "logps/chosen": -2.5859451293945312, "logps/rejected": -4.118295192718506, "loss": 0.6078, "nll_loss": 0.5742219090461731, "rewards/accuracies": 0.875, "rewards/chosen": -0.2585945129394531, "rewards/margins": 0.15323500335216522, "rewards/rejected": -0.41182953119277954, "step": 3943 }, { "epoch": 10.798083504449007, "grad_norm": 3.473574638366699, "learning_rate": 4.5972602739726024e-07, "log_odds_chosen": 1.7985066175460815, "log_odds_ratio": -0.30295562744140625, "logits/chosen": 0.8935789465904236, "logits/rejected": 0.888014554977417, "logps/chosen": -1.6671562194824219, "logps/rejected": -3.3234188556671143, "loss": 0.5213, "nll_loss": 0.4910343885421753, "rewards/accuracies": 0.875, "rewards/chosen": -0.16671563684940338, "rewards/margins": 0.16562625765800476, "rewards/rejected": -0.33234190940856934, "step": 3944 }, { "epoch": 10.80082135523614, "grad_norm": 3.806788206100464, "learning_rate": 4.595890410958904e-07, "log_odds_chosen": 5.282708644866943, "log_odds_ratio": -0.1182592436671257, "logits/chosen": 1.215641975402832, "logits/rejected": 1.2965716123580933, "logps/chosen": -2.0970845222473145, "logps/rejected": -7.147249221801758, "loss": 0.6543, "nll_loss": 0.6425216197967529, "rewards/accuracies": 0.875, "rewards/chosen": -0.20970845222473145, "rewards/margins": 0.5050165057182312, "rewards/rejected": -0.7147248983383179, "step": 3945 }, { "epoch": 10.803559206023271, "grad_norm": 3.636881113052368, "learning_rate": 4.5945205479452055e-07, "log_odds_chosen": 2.3745357990264893, "log_odds_ratio": -0.28523439168930054, "logits/chosen": 0.854210615158081, "logits/rejected": 0.7910939455032349, "logps/chosen": -1.8701748847961426, "logps/rejected": -4.037162780761719, "loss": 0.6084, "nll_loss": 0.5799094438552856, "rewards/accuracies": 0.875, "rewards/chosen": -0.18701748549938202, "rewards/margins": 0.2166987657546997, "rewards/rejected": -0.4037162661552429, "step": 3946 }, { "epoch": 10.806297056810404, "grad_norm": 6.377212047576904, "learning_rate": 4.5931506849315064e-07, "log_odds_chosen": 3.799077033996582, "log_odds_ratio": -0.1603265404701233, "logits/chosen": 1.0970522165298462, "logits/rejected": 1.075751543045044, "logps/chosen": -1.9199943542480469, "logps/rejected": -5.539267063140869, "loss": 0.6198, "nll_loss": 0.6038081645965576, "rewards/accuracies": 1.0, "rewards/chosen": -0.1919994354248047, "rewards/margins": 0.3619272708892822, "rewards/rejected": -0.5539267063140869, "step": 3947 }, { "epoch": 10.809034907597535, "grad_norm": 3.37448787689209, "learning_rate": 4.591780821917808e-07, "log_odds_chosen": 1.363154649734497, "log_odds_ratio": -0.30961158871650696, "logits/chosen": 0.845634400844574, "logits/rejected": 0.8054023385047913, "logps/chosen": -2.0210514068603516, "logps/rejected": -3.2655088901519775, "loss": 0.6286, "nll_loss": 0.5976353287696838, "rewards/accuracies": 0.875, "rewards/chosen": -0.20210513472557068, "rewards/margins": 0.12444575130939484, "rewards/rejected": -0.3265508711338043, "step": 3948 }, { "epoch": 10.811772758384668, "grad_norm": 3.5576486587524414, "learning_rate": 4.5904109589041095e-07, "log_odds_chosen": 2.1643669605255127, "log_odds_ratio": -0.22514770925045013, "logits/chosen": 1.0361422300338745, "logits/rejected": 0.9769780039787292, "logps/chosen": -1.5994609594345093, "logps/rejected": -3.5110621452331543, "loss": 0.4694, "nll_loss": 0.44686973094940186, "rewards/accuracies": 1.0, "rewards/chosen": -0.15994609892368317, "rewards/margins": 0.19116011261940002, "rewards/rejected": -0.3511062264442444, "step": 3949 }, { "epoch": 10.8145106091718, "grad_norm": 3.572274923324585, "learning_rate": 4.589041095890411e-07, "log_odds_chosen": 1.5095471143722534, "log_odds_ratio": -0.3028812110424042, "logits/chosen": 1.042176604270935, "logits/rejected": 1.008986234664917, "logps/chosen": -1.5910297632217407, "logps/rejected": -2.91937255859375, "loss": 0.4956, "nll_loss": 0.46526631712913513, "rewards/accuracies": 0.875, "rewards/chosen": -0.15910297632217407, "rewards/margins": 0.1328342705965042, "rewards/rejected": -0.2919372320175171, "step": 3950 }, { "epoch": 10.817248459958932, "grad_norm": 3.5099902153015137, "learning_rate": 4.587671232876712e-07, "log_odds_chosen": 3.0382235050201416, "log_odds_ratio": -0.2466355860233307, "logits/chosen": 0.8202883005142212, "logits/rejected": 0.8147229552268982, "logps/chosen": -1.7577875852584839, "logps/rejected": -4.636058807373047, "loss": 0.5839, "nll_loss": 0.5592271089553833, "rewards/accuracies": 1.0, "rewards/chosen": -0.17577876150608063, "rewards/margins": 0.28782713413238525, "rewards/rejected": -0.4636058807373047, "step": 3951 }, { "epoch": 10.819986310746064, "grad_norm": 3.460430145263672, "learning_rate": 4.5863013698630135e-07, "log_odds_chosen": 1.9580061435699463, "log_odds_ratio": -0.3125224709510803, "logits/chosen": 0.772892415523529, "logits/rejected": 0.759590208530426, "logps/chosen": -1.556429386138916, "logps/rejected": -3.3806471824645996, "loss": 0.5736, "nll_loss": 0.5423523187637329, "rewards/accuracies": 1.0, "rewards/chosen": -0.15564294159412384, "rewards/margins": 0.18242178857326508, "rewards/rejected": -0.33806470036506653, "step": 3952 }, { "epoch": 10.822724161533197, "grad_norm": 4.965414047241211, "learning_rate": 4.584931506849315e-07, "log_odds_chosen": 4.000895977020264, "log_odds_ratio": -0.32964247465133667, "logits/chosen": 0.8691802620887756, "logits/rejected": 0.8486786484718323, "logps/chosen": -1.7185698747634888, "logps/rejected": -5.634868621826172, "loss": 0.6093, "nll_loss": 0.576309859752655, "rewards/accuracies": 0.875, "rewards/chosen": -0.17185696959495544, "rewards/margins": 0.3916299045085907, "rewards/rejected": -0.5634868741035461, "step": 3953 }, { "epoch": 10.825462012320328, "grad_norm": 3.48270583152771, "learning_rate": 4.583561643835616e-07, "log_odds_chosen": 2.0992515087127686, "log_odds_ratio": -0.30426526069641113, "logits/chosen": 0.8475841879844666, "logits/rejected": 0.7368036508560181, "logps/chosen": -1.7803609371185303, "logps/rejected": -3.699249029159546, "loss": 0.6161, "nll_loss": 0.5856309533119202, "rewards/accuracies": 0.875, "rewards/chosen": -0.17803607881069183, "rewards/margins": 0.19188883900642395, "rewards/rejected": -0.3699249029159546, "step": 3954 }, { "epoch": 10.82819986310746, "grad_norm": 3.5084447860717773, "learning_rate": 4.5821917808219176e-07, "log_odds_chosen": 2.873775005340576, "log_odds_ratio": -0.1728593409061432, "logits/chosen": 1.0239953994750977, "logits/rejected": 1.031434178352356, "logps/chosen": -1.632469654083252, "logps/rejected": -4.266459941864014, "loss": 0.5506, "nll_loss": 0.5332972407341003, "rewards/accuracies": 1.0, "rewards/chosen": -0.16324695944786072, "rewards/margins": 0.26339900493621826, "rewards/rejected": -0.42664599418640137, "step": 3955 }, { "epoch": 10.830937713894592, "grad_norm": 3.5373482704162598, "learning_rate": 4.580821917808219e-07, "log_odds_chosen": 2.340909481048584, "log_odds_ratio": -0.23630721867084503, "logits/chosen": 0.9911712408065796, "logits/rejected": 1.0237091779708862, "logps/chosen": -2.028824806213379, "logps/rejected": -4.232760429382324, "loss": 0.5255, "nll_loss": 0.5018328428268433, "rewards/accuracies": 1.0, "rewards/chosen": -0.20288249850273132, "rewards/margins": 0.2203935831785202, "rewards/rejected": -0.4232760965824127, "step": 3956 }, { "epoch": 10.833675564681725, "grad_norm": 3.6518006324768066, "learning_rate": 4.5794520547945206e-07, "log_odds_chosen": 1.8684539794921875, "log_odds_ratio": -0.291655957698822, "logits/chosen": 0.8305066823959351, "logits/rejected": 0.8308968544006348, "logps/chosen": -2.361245632171631, "logps/rejected": -4.135655879974365, "loss": 0.6477, "nll_loss": 0.6185140013694763, "rewards/accuracies": 0.875, "rewards/chosen": -0.23612457513809204, "rewards/margins": 0.17744101583957672, "rewards/rejected": -0.41356560587882996, "step": 3957 }, { "epoch": 10.836413415468858, "grad_norm": 3.7613470554351807, "learning_rate": 4.5780821917808216e-07, "log_odds_chosen": 5.077747344970703, "log_odds_ratio": -0.08138683438301086, "logits/chosen": 1.1306990385055542, "logits/rejected": 1.158935546875, "logps/chosen": -1.6769404411315918, "logps/rejected": -6.5493340492248535, "loss": 0.599, "nll_loss": 0.5908323526382446, "rewards/accuracies": 1.0, "rewards/chosen": -0.16769403219223022, "rewards/margins": 0.48723936080932617, "rewards/rejected": -0.6549333930015564, "step": 3958 }, { "epoch": 10.839151266255989, "grad_norm": 3.6485636234283447, "learning_rate": 4.576712328767123e-07, "log_odds_chosen": 2.690182685852051, "log_odds_ratio": -0.3237409293651581, "logits/chosen": 0.6685085892677307, "logits/rejected": 0.6066617369651794, "logps/chosen": -1.5619194507598877, "logps/rejected": -4.068719863891602, "loss": 0.5601, "nll_loss": 0.527764081954956, "rewards/accuracies": 0.75, "rewards/chosen": -0.15619194507598877, "rewards/margins": 0.2506800591945648, "rewards/rejected": -0.4068720042705536, "step": 3959 }, { "epoch": 10.841889117043122, "grad_norm": 3.065420627593994, "learning_rate": 4.5753424657534246e-07, "log_odds_chosen": 3.700408458709717, "log_odds_ratio": -0.13728974759578705, "logits/chosen": 0.9032544493675232, "logits/rejected": 0.8209311962127686, "logps/chosen": -1.6705234050750732, "logps/rejected": -5.175429821014404, "loss": 0.662, "nll_loss": 0.6482948660850525, "rewards/accuracies": 1.0, "rewards/chosen": -0.16705234348773956, "rewards/margins": 0.35049065947532654, "rewards/rejected": -0.5175430178642273, "step": 3960 }, { "epoch": 10.844626967830253, "grad_norm": 3.7922351360321045, "learning_rate": 4.5739726027397256e-07, "log_odds_chosen": 3.817556858062744, "log_odds_ratio": -0.23664472997188568, "logits/chosen": 0.681585967540741, "logits/rejected": 0.6682917475700378, "logps/chosen": -1.8468384742736816, "logps/rejected": -5.571325302124023, "loss": 0.5876, "nll_loss": 0.5639768838882446, "rewards/accuracies": 0.875, "rewards/chosen": -0.18468384444713593, "rewards/margins": 0.3724486827850342, "rewards/rejected": -0.5571324825286865, "step": 3961 }, { "epoch": 10.847364818617386, "grad_norm": 3.5859744548797607, "learning_rate": 4.572602739726027e-07, "log_odds_chosen": 3.2748043537139893, "log_odds_ratio": -0.24889229238033295, "logits/chosen": 0.8852242827415466, "logits/rejected": 0.8387964963912964, "logps/chosen": -2.3668012619018555, "logps/rejected": -5.460748195648193, "loss": 0.5752, "nll_loss": 0.5502629280090332, "rewards/accuracies": 0.875, "rewards/chosen": -0.23668013513088226, "rewards/margins": 0.3093947172164917, "rewards/rejected": -0.5460748672485352, "step": 3962 }, { "epoch": 10.850102669404517, "grad_norm": 6.640139579772949, "learning_rate": 4.5712328767123287e-07, "log_odds_chosen": 2.079758644104004, "log_odds_ratio": -0.786766767501831, "logits/chosen": 0.8990789651870728, "logits/rejected": 0.8992602825164795, "logps/chosen": -2.709434747695923, "logps/rejected": -4.678492546081543, "loss": 0.6759, "nll_loss": 0.5972552299499512, "rewards/accuracies": 0.875, "rewards/chosen": -0.27094346284866333, "rewards/margins": 0.19690579175949097, "rewards/rejected": -0.4678492546081543, "step": 3963 }, { "epoch": 10.85284052019165, "grad_norm": 5.263262748718262, "learning_rate": 4.56986301369863e-07, "log_odds_chosen": 1.6373541355133057, "log_odds_ratio": -0.4528636336326599, "logits/chosen": 1.197375774383545, "logits/rejected": 1.193185806274414, "logps/chosen": -2.1272454261779785, "logps/rejected": -3.638951301574707, "loss": 0.6169, "nll_loss": 0.571656346321106, "rewards/accuracies": 0.75, "rewards/chosen": -0.21272453665733337, "rewards/margins": 0.15117056667804718, "rewards/rejected": -0.36389511823654175, "step": 3964 }, { "epoch": 10.855578370978781, "grad_norm": 2.8678579330444336, "learning_rate": 4.568493150684931e-07, "log_odds_chosen": 2.0236928462982178, "log_odds_ratio": -0.2115919142961502, "logits/chosen": 1.0652813911437988, "logits/rejected": 1.0872106552124023, "logps/chosen": -1.4804017543792725, "logps/rejected": -3.2559924125671387, "loss": 0.5693, "nll_loss": 0.548113226890564, "rewards/accuracies": 1.0, "rewards/chosen": -0.14804017543792725, "rewards/margins": 0.17755909264087677, "rewards/rejected": -0.3255992531776428, "step": 3965 }, { "epoch": 10.858316221765914, "grad_norm": 3.2953405380249023, "learning_rate": 4.5671232876712327e-07, "log_odds_chosen": 1.7375569343566895, "log_odds_ratio": -0.24675799906253815, "logits/chosen": 0.8617849349975586, "logits/rejected": 0.8173549771308899, "logps/chosen": -1.335160493850708, "logps/rejected": -2.849247455596924, "loss": 0.5105, "nll_loss": 0.48578566312789917, "rewards/accuracies": 1.0, "rewards/chosen": -0.13351604342460632, "rewards/margins": 0.15140867233276367, "rewards/rejected": -0.28492471575737, "step": 3966 }, { "epoch": 10.861054072553046, "grad_norm": 3.655334234237671, "learning_rate": 4.565753424657534e-07, "log_odds_chosen": 3.515561580657959, "log_odds_ratio": -0.0693855732679367, "logits/chosen": 1.1780716180801392, "logits/rejected": 1.223220944404602, "logps/chosen": -1.590026617050171, "logps/rejected": -4.857466697692871, "loss": 0.5158, "nll_loss": 0.5088722109794617, "rewards/accuracies": 1.0, "rewards/chosen": -0.1590026617050171, "rewards/margins": 0.326744019985199, "rewards/rejected": -0.48574668169021606, "step": 3967 }, { "epoch": 10.863791923340179, "grad_norm": 3.5552401542663574, "learning_rate": 4.564383561643835e-07, "log_odds_chosen": 1.5898170471191406, "log_odds_ratio": -0.25697118043899536, "logits/chosen": 0.93888258934021, "logits/rejected": 0.8507428765296936, "logps/chosen": -1.8912990093231201, "logps/rejected": -3.2894911766052246, "loss": 0.6504, "nll_loss": 0.6246872544288635, "rewards/accuracies": 1.0, "rewards/chosen": -0.18912990391254425, "rewards/margins": 0.1398192197084427, "rewards/rejected": -0.32894912362098694, "step": 3968 }, { "epoch": 10.86652977412731, "grad_norm": 3.261324405670166, "learning_rate": 4.5630136986301367e-07, "log_odds_chosen": 2.057169198989868, "log_odds_ratio": -0.32272326946258545, "logits/chosen": 0.7690395712852478, "logits/rejected": 0.865947425365448, "logps/chosen": -1.6083104610443115, "logps/rejected": -3.518516778945923, "loss": 0.5782, "nll_loss": 0.5458850860595703, "rewards/accuracies": 0.875, "rewards/chosen": -0.1608310341835022, "rewards/margins": 0.1910206377506256, "rewards/rejected": -0.3518517017364502, "step": 3969 }, { "epoch": 10.869267624914443, "grad_norm": 3.6029322147369385, "learning_rate": 4.561643835616438e-07, "log_odds_chosen": 1.9703648090362549, "log_odds_ratio": -0.3746575713157654, "logits/chosen": 0.9127928614616394, "logits/rejected": 0.8717220425605774, "logps/chosen": -2.1857542991638184, "logps/rejected": -4.079562664031982, "loss": 0.5856, "nll_loss": 0.5481322407722473, "rewards/accuracies": 0.75, "rewards/chosen": -0.21857546269893646, "rewards/margins": 0.18938079476356506, "rewards/rejected": -0.40795624256134033, "step": 3970 }, { "epoch": 10.872005475701574, "grad_norm": 3.4232311248779297, "learning_rate": 4.560273972602739e-07, "log_odds_chosen": 3.8391149044036865, "log_odds_ratio": -0.07836341857910156, "logits/chosen": 1.0808173418045044, "logits/rejected": 1.1027450561523438, "logps/chosen": -1.7724356651306152, "logps/rejected": -5.438913345336914, "loss": 0.6311, "nll_loss": 0.623223066329956, "rewards/accuracies": 1.0, "rewards/chosen": -0.17724357545375824, "rewards/margins": 0.36664775013923645, "rewards/rejected": -0.5438913106918335, "step": 3971 }, { "epoch": 10.874743326488707, "grad_norm": 3.1723551750183105, "learning_rate": 4.558904109589041e-07, "log_odds_chosen": 3.3587183952331543, "log_odds_ratio": -0.1650552749633789, "logits/chosen": 1.1584017276763916, "logits/rejected": 1.1620620489120483, "logps/chosen": -1.7381653785705566, "logps/rejected": -4.742012023925781, "loss": 0.5119, "nll_loss": 0.49535197019577026, "rewards/accuracies": 1.0, "rewards/chosen": -0.1738165318965912, "rewards/margins": 0.30038464069366455, "rewards/rejected": -0.4742012023925781, "step": 3972 }, { "epoch": 10.877481177275838, "grad_norm": 3.3495731353759766, "learning_rate": 4.5575342465753423e-07, "log_odds_chosen": 4.27575159072876, "log_odds_ratio": -0.23011671006679535, "logits/chosen": 0.9761030673980713, "logits/rejected": 0.9768726825714111, "logps/chosen": -2.4591586589813232, "logps/rejected": -6.612799644470215, "loss": 0.6461, "nll_loss": 0.6231114268302917, "rewards/accuracies": 0.875, "rewards/chosen": -0.24591588973999023, "rewards/margins": 0.4153640866279602, "rewards/rejected": -0.6612799763679504, "step": 3973 }, { "epoch": 10.880219028062971, "grad_norm": 4.155946254730225, "learning_rate": 4.556164383561644e-07, "log_odds_chosen": 2.5845987796783447, "log_odds_ratio": -0.2333109974861145, "logits/chosen": 0.6624661087989807, "logits/rejected": 0.6864141225814819, "logps/chosen": -2.2575559616088867, "logps/rejected": -4.6995744705200195, "loss": 0.6393, "nll_loss": 0.6160114407539368, "rewards/accuracies": 1.0, "rewards/chosen": -0.22575558722019196, "rewards/margins": 0.2442018687725067, "rewards/rejected": -0.46995747089385986, "step": 3974 }, { "epoch": 10.882956878850102, "grad_norm": 3.428257942199707, "learning_rate": 4.554794520547945e-07, "log_odds_chosen": 2.2354722023010254, "log_odds_ratio": -0.21299895644187927, "logits/chosen": 1.1464287042617798, "logits/rejected": 1.1819545030593872, "logps/chosen": -2.0149950981140137, "logps/rejected": -3.951870918273926, "loss": 0.5342, "nll_loss": 0.5129168033599854, "rewards/accuracies": 1.0, "rewards/chosen": -0.20149952173233032, "rewards/margins": 0.19368760287761688, "rewards/rejected": -0.395187109708786, "step": 3975 }, { "epoch": 10.885694729637235, "grad_norm": 3.637624740600586, "learning_rate": 4.5534246575342463e-07, "log_odds_chosen": 1.8724327087402344, "log_odds_ratio": -0.27179670333862305, "logits/chosen": 0.8424263000488281, "logits/rejected": 0.8194828033447266, "logps/chosen": -1.6775829792022705, "logps/rejected": -3.373904228210449, "loss": 0.6478, "nll_loss": 0.620657205581665, "rewards/accuracies": 0.875, "rewards/chosen": -0.1677582859992981, "rewards/margins": 0.16963209211826324, "rewards/rejected": -0.3373904228210449, "step": 3976 }, { "epoch": 10.888432580424366, "grad_norm": 3.7714483737945557, "learning_rate": 4.552054794520548e-07, "log_odds_chosen": 2.795022964477539, "log_odds_ratio": -0.1771032065153122, "logits/chosen": 0.9496679306030273, "logits/rejected": 1.0233721733093262, "logps/chosen": -2.139460563659668, "logps/rejected": -4.801949501037598, "loss": 0.5748, "nll_loss": 0.5570803284645081, "rewards/accuracies": 0.875, "rewards/chosen": -0.21394604444503784, "rewards/margins": 0.2662489414215088, "rewards/rejected": -0.480195015668869, "step": 3977 }, { "epoch": 10.8911704312115, "grad_norm": 6.379066467285156, "learning_rate": 4.550684931506849e-07, "log_odds_chosen": 2.371962785720825, "log_odds_ratio": -0.3033096194267273, "logits/chosen": 1.271270513534546, "logits/rejected": 1.2574052810668945, "logps/chosen": -1.8810118436813354, "logps/rejected": -4.0417160987854, "loss": 0.6354, "nll_loss": 0.605073094367981, "rewards/accuracies": 0.875, "rewards/chosen": -0.18810118734836578, "rewards/margins": 0.21607044339179993, "rewards/rejected": -0.40417158603668213, "step": 3978 }, { "epoch": 10.89390828199863, "grad_norm": 3.4826834201812744, "learning_rate": 4.5493150684931503e-07, "log_odds_chosen": 3.4977340698242188, "log_odds_ratio": -0.1895751953125, "logits/chosen": 1.125335931777954, "logits/rejected": 1.1716127395629883, "logps/chosen": -2.3300132751464844, "logps/rejected": -5.759755611419678, "loss": 0.5587, "nll_loss": 0.5397111773490906, "rewards/accuracies": 0.875, "rewards/chosen": -0.23300135135650635, "rewards/margins": 0.3429742753505707, "rewards/rejected": -0.5759755969047546, "step": 3979 }, { "epoch": 10.896646132785763, "grad_norm": 3.504739999771118, "learning_rate": 4.547945205479452e-07, "log_odds_chosen": 2.0158214569091797, "log_odds_ratio": -0.24091656506061554, "logits/chosen": 1.0258204936981201, "logits/rejected": 1.0299171209335327, "logps/chosen": -2.0844452381134033, "logps/rejected": -3.988208532333374, "loss": 0.5697, "nll_loss": 0.5456457734107971, "rewards/accuracies": 1.0, "rewards/chosen": -0.2084445357322693, "rewards/margins": 0.19037634134292603, "rewards/rejected": -0.3988208770751953, "step": 3980 }, { "epoch": 10.899383983572895, "grad_norm": 3.550046920776367, "learning_rate": 4.5465753424657534e-07, "log_odds_chosen": 4.130367755889893, "log_odds_ratio": -0.17696812748908997, "logits/chosen": 1.0138317346572876, "logits/rejected": 1.0473592281341553, "logps/chosen": -2.091381072998047, "logps/rejected": -6.0426106452941895, "loss": 0.4885, "nll_loss": 0.47075897455215454, "rewards/accuracies": 0.875, "rewards/chosen": -0.20913812518119812, "rewards/margins": 0.3951229453086853, "rewards/rejected": -0.604261040687561, "step": 3981 }, { "epoch": 10.902121834360027, "grad_norm": 3.4378244876861572, "learning_rate": 4.5452054794520544e-07, "log_odds_chosen": 3.47510027885437, "log_odds_ratio": -0.24226915836334229, "logits/chosen": 0.6952495574951172, "logits/rejected": 0.7326613664627075, "logps/chosen": -2.179985523223877, "logps/rejected": -5.50473690032959, "loss": 0.5863, "nll_loss": 0.5620366334915161, "rewards/accuracies": 0.875, "rewards/chosen": -0.21799854934215546, "rewards/margins": 0.3324751853942871, "rewards/rejected": -0.550473690032959, "step": 3982 }, { "epoch": 10.904859685147159, "grad_norm": 5.149446964263916, "learning_rate": 4.5438356164383564e-07, "log_odds_chosen": 4.183446407318115, "log_odds_ratio": -0.0944003164768219, "logits/chosen": 0.9364455938339233, "logits/rejected": 0.9184107780456543, "logps/chosen": -2.207624912261963, "logps/rejected": -6.218709468841553, "loss": 0.6954, "nll_loss": 0.6859670877456665, "rewards/accuracies": 1.0, "rewards/chosen": -0.2207624912261963, "rewards/margins": 0.40110844373703003, "rewards/rejected": -0.6218709349632263, "step": 3983 }, { "epoch": 10.907597535934292, "grad_norm": 9.62877082824707, "learning_rate": 4.5424657534246574e-07, "log_odds_chosen": 1.9924124479293823, "log_odds_ratio": -0.47337937355041504, "logits/chosen": 1.1417903900146484, "logits/rejected": 1.1096527576446533, "logps/chosen": -2.3906307220458984, "logps/rejected": -4.153937339782715, "loss": 0.6335, "nll_loss": 0.5861243009567261, "rewards/accuracies": 0.875, "rewards/chosen": -0.2390630692243576, "rewards/margins": 0.17633068561553955, "rewards/rejected": -0.41539376974105835, "step": 3984 }, { "epoch": 10.910335386721425, "grad_norm": 4.6442694664001465, "learning_rate": 4.5410958904109584e-07, "log_odds_chosen": 1.7959449291229248, "log_odds_ratio": -0.42652201652526855, "logits/chosen": 0.988003671169281, "logits/rejected": 1.0427932739257812, "logps/chosen": -2.2868363857269287, "logps/rejected": -3.991342067718506, "loss": 0.7433, "nll_loss": 0.7006685137748718, "rewards/accuracies": 0.75, "rewards/chosen": -0.22868365049362183, "rewards/margins": 0.17045056819915771, "rewards/rejected": -0.39913421869277954, "step": 3985 }, { "epoch": 10.913073237508556, "grad_norm": 4.231143951416016, "learning_rate": 4.53972602739726e-07, "log_odds_chosen": 1.9688999652862549, "log_odds_ratio": -0.2509823143482208, "logits/chosen": 0.8514626622200012, "logits/rejected": 0.811380922794342, "logps/chosen": -2.0606002807617188, "logps/rejected": -3.918696641921997, "loss": 0.614, "nll_loss": 0.5888562798500061, "rewards/accuracies": 1.0, "rewards/chosen": -0.2060600370168686, "rewards/margins": 0.1858096420764923, "rewards/rejected": -0.3918696641921997, "step": 3986 }, { "epoch": 10.915811088295689, "grad_norm": 3.504685878753662, "learning_rate": 4.5383561643835615e-07, "log_odds_chosen": 3.4675850868225098, "log_odds_ratio": -0.07683517783880234, "logits/chosen": 1.1442843675613403, "logits/rejected": 1.1556291580200195, "logps/chosen": -1.6509466171264648, "logps/rejected": -4.755890846252441, "loss": 0.6321, "nll_loss": 0.6244204640388489, "rewards/accuracies": 1.0, "rewards/chosen": -0.16509465873241425, "rewards/margins": 0.31049442291259766, "rewards/rejected": -0.4755890667438507, "step": 3987 }, { "epoch": 10.91854893908282, "grad_norm": 4.093649864196777, "learning_rate": 4.536986301369863e-07, "log_odds_chosen": 2.1197590827941895, "log_odds_ratio": -0.3373614549636841, "logits/chosen": 0.6410534381866455, "logits/rejected": 0.5699242353439331, "logps/chosen": -1.4671562910079956, "logps/rejected": -3.4384708404541016, "loss": 0.5799, "nll_loss": 0.5462014675140381, "rewards/accuracies": 1.0, "rewards/chosen": -0.14671564102172852, "rewards/margins": 0.19713148474693298, "rewards/rejected": -0.3438470959663391, "step": 3988 }, { "epoch": 10.921286789869953, "grad_norm": 3.466287136077881, "learning_rate": 4.535616438356164e-07, "log_odds_chosen": 1.2381312847137451, "log_odds_ratio": -0.3169635534286499, "logits/chosen": 0.9029029607772827, "logits/rejected": 0.8702285289764404, "logps/chosen": -1.8510537147521973, "logps/rejected": -2.926391839981079, "loss": 0.572, "nll_loss": 0.5402873754501343, "rewards/accuracies": 0.875, "rewards/chosen": -0.18510538339614868, "rewards/margins": 0.10753381997346878, "rewards/rejected": -0.29263919591903687, "step": 3989 }, { "epoch": 10.924024640657084, "grad_norm": 4.115396022796631, "learning_rate": 4.534246575342466e-07, "log_odds_chosen": 1.4018969535827637, "log_odds_ratio": -0.3804413080215454, "logits/chosen": 0.9224982261657715, "logits/rejected": 0.8336309194564819, "logps/chosen": -1.2578058242797852, "logps/rejected": -2.38295841217041, "loss": 0.5653, "nll_loss": 0.5272172093391418, "rewards/accuracies": 1.0, "rewards/chosen": -0.12578058242797852, "rewards/margins": 0.11251527070999146, "rewards/rejected": -0.23829585313796997, "step": 3990 }, { "epoch": 10.926762491444217, "grad_norm": 13.86635684967041, "learning_rate": 4.532876712328767e-07, "log_odds_chosen": 1.1875990629196167, "log_odds_ratio": -0.52066969871521, "logits/chosen": 1.1878904104232788, "logits/rejected": 1.050567388534546, "logps/chosen": -2.768087148666382, "logps/rejected": -3.8226993083953857, "loss": 0.7068, "nll_loss": 0.6546914577484131, "rewards/accuracies": 0.75, "rewards/chosen": -0.2768087089061737, "rewards/margins": 0.10546121746301651, "rewards/rejected": -0.382269948720932, "step": 3991 }, { "epoch": 10.929500342231348, "grad_norm": 4.185695171356201, "learning_rate": 4.531506849315068e-07, "log_odds_chosen": 1.4453378915786743, "log_odds_ratio": -0.7226681709289551, "logits/chosen": 0.8116583824157715, "logits/rejected": 0.8499774932861328, "logps/chosen": -2.116424560546875, "logps/rejected": -3.467426061630249, "loss": 0.5878, "nll_loss": 0.5155575275421143, "rewards/accuracies": 0.75, "rewards/chosen": -0.21164244413375854, "rewards/margins": 0.13510018587112427, "rewards/rejected": -0.3467426300048828, "step": 3992 }, { "epoch": 10.932238193018481, "grad_norm": 3.4937219619750977, "learning_rate": 4.5301369863013695e-07, "log_odds_chosen": 4.316988945007324, "log_odds_ratio": -0.07595373690128326, "logits/chosen": 1.2042875289916992, "logits/rejected": 1.191198706626892, "logps/chosen": -2.2478437423706055, "logps/rejected": -6.363707542419434, "loss": 0.6161, "nll_loss": 0.608532190322876, "rewards/accuracies": 1.0, "rewards/chosen": -0.22478438913822174, "rewards/margins": 0.41158634424209595, "rewards/rejected": -0.6363707184791565, "step": 3993 }, { "epoch": 10.934976043805612, "grad_norm": 5.11115026473999, "learning_rate": 4.528767123287671e-07, "log_odds_chosen": 2.528034210205078, "log_odds_ratio": -0.3045123517513275, "logits/chosen": 0.7967591285705566, "logits/rejected": 0.6994569301605225, "logps/chosen": -2.2101492881774902, "logps/rejected": -4.621496677398682, "loss": 0.7003, "nll_loss": 0.6698273420333862, "rewards/accuracies": 0.875, "rewards/chosen": -0.22101494669914246, "rewards/margins": 0.24113474786281586, "rewards/rejected": -0.4621497094631195, "step": 3994 }, { "epoch": 10.937713894592745, "grad_norm": 3.6880946159362793, "learning_rate": 4.5273972602739726e-07, "log_odds_chosen": 1.8678555488586426, "log_odds_ratio": -0.22614207863807678, "logits/chosen": 0.9111765027046204, "logits/rejected": 0.9101690053939819, "logps/chosen": -2.0911338329315186, "logps/rejected": -3.811415195465088, "loss": 0.5436, "nll_loss": 0.5209792256355286, "rewards/accuracies": 1.0, "rewards/chosen": -0.20911338925361633, "rewards/margins": 0.17202816903591156, "rewards/rejected": -0.3811415433883667, "step": 3995 }, { "epoch": 10.940451745379876, "grad_norm": 4.446686744689941, "learning_rate": 4.5260273972602735e-07, "log_odds_chosen": 4.068179130554199, "log_odds_ratio": -0.3310611844062805, "logits/chosen": 0.9859323501586914, "logits/rejected": 0.9338988661766052, "logps/chosen": -2.4734582901000977, "logps/rejected": -6.431804656982422, "loss": 0.85, "nll_loss": 0.816926121711731, "rewards/accuracies": 0.875, "rewards/chosen": -0.24734583497047424, "rewards/margins": 0.39583465456962585, "rewards/rejected": -0.6431804895401001, "step": 3996 }, { "epoch": 10.94318959616701, "grad_norm": 3.89205265045166, "learning_rate": 4.5246575342465756e-07, "log_odds_chosen": 3.7284674644470215, "log_odds_ratio": -0.31320926547050476, "logits/chosen": 1.090806245803833, "logits/rejected": 1.1245594024658203, "logps/chosen": -2.1772661209106445, "logps/rejected": -5.723475456237793, "loss": 0.652, "nll_loss": 0.620713472366333, "rewards/accuracies": 0.875, "rewards/chosen": -0.21772660315036774, "rewards/margins": 0.35462093353271484, "rewards/rejected": -0.5723475217819214, "step": 3997 }, { "epoch": 10.94592744695414, "grad_norm": 2.877896308898926, "learning_rate": 4.5232876712328766e-07, "log_odds_chosen": 2.6043460369110107, "log_odds_ratio": -0.27877017855644226, "logits/chosen": 1.281319499015808, "logits/rejected": 1.2180683612823486, "logps/chosen": -1.7175778150558472, "logps/rejected": -4.17681884765625, "loss": 0.5589, "nll_loss": 0.5310471057891846, "rewards/accuracies": 0.875, "rewards/chosen": -0.171757772564888, "rewards/margins": 0.24592413008213043, "rewards/rejected": -0.41768190264701843, "step": 3998 }, { "epoch": 10.948665297741274, "grad_norm": 5.062335014343262, "learning_rate": 4.5219178082191776e-07, "log_odds_chosen": 2.9486982822418213, "log_odds_ratio": -0.2556057572364807, "logits/chosen": 1.0204521417617798, "logits/rejected": 1.0372508764266968, "logps/chosen": -2.369711399078369, "logps/rejected": -5.212132930755615, "loss": 0.7772, "nll_loss": 0.751632809638977, "rewards/accuracies": 0.875, "rewards/chosen": -0.2369711548089981, "rewards/margins": 0.2842421531677246, "rewards/rejected": -0.5212132930755615, "step": 3999 }, { "epoch": 10.951403148528405, "grad_norm": 4.632683753967285, "learning_rate": 4.520547945205479e-07, "log_odds_chosen": 2.6059961318969727, "log_odds_ratio": -0.18076097965240479, "logits/chosen": 1.1334929466247559, "logits/rejected": 1.0965650081634521, "logps/chosen": -1.6656087636947632, "logps/rejected": -4.018435001373291, "loss": 0.5433, "nll_loss": 0.5252529382705688, "rewards/accuracies": 1.0, "rewards/chosen": -0.16656087338924408, "rewards/margins": 0.23528261482715607, "rewards/rejected": -0.40184345841407776, "step": 4000 }, { "epoch": 10.954140999315538, "grad_norm": 5.697526931762695, "learning_rate": 4.5191780821917806e-07, "log_odds_chosen": 3.3944671154022217, "log_odds_ratio": -0.22901028394699097, "logits/chosen": 1.0127601623535156, "logits/rejected": 0.9564743638038635, "logps/chosen": -1.9125449657440186, "logps/rejected": -5.057211875915527, "loss": 0.5043, "nll_loss": 0.48135045170783997, "rewards/accuracies": 0.875, "rewards/chosen": -0.19125451147556305, "rewards/margins": 0.3144667148590088, "rewards/rejected": -0.5057212114334106, "step": 4001 }, { "epoch": 10.956878850102669, "grad_norm": 3.868574619293213, "learning_rate": 4.517808219178082e-07, "log_odds_chosen": 2.080322027206421, "log_odds_ratio": -0.21032768487930298, "logits/chosen": 0.8273828029632568, "logits/rejected": 0.6821260452270508, "logps/chosen": -1.4484038352966309, "logps/rejected": -3.3153624534606934, "loss": 0.5669, "nll_loss": 0.5459167957305908, "rewards/accuracies": 1.0, "rewards/chosen": -0.14484038949012756, "rewards/margins": 0.1866958737373352, "rewards/rejected": -0.33153626322746277, "step": 4002 }, { "epoch": 10.959616700889802, "grad_norm": 3.364743709564209, "learning_rate": 4.516438356164383e-07, "log_odds_chosen": 2.820749282836914, "log_odds_ratio": -0.09530690312385559, "logits/chosen": 1.071681261062622, "logits/rejected": 1.0973553657531738, "logps/chosen": -2.3757848739624023, "logps/rejected": -5.025312900543213, "loss": 0.6215, "nll_loss": 0.6119193434715271, "rewards/accuracies": 1.0, "rewards/chosen": -0.23757849633693695, "rewards/margins": 0.26495277881622314, "rewards/rejected": -0.5025312900543213, "step": 4003 }, { "epoch": 10.962354551676933, "grad_norm": 3.9502720832824707, "learning_rate": 4.515068493150685e-07, "log_odds_chosen": 1.6258549690246582, "log_odds_ratio": -0.23575013875961304, "logits/chosen": 0.9468117356300354, "logits/rejected": 0.9131162166595459, "logps/chosen": -2.100994825363159, "logps/rejected": -3.615666389465332, "loss": 0.5933, "nll_loss": 0.5697697401046753, "rewards/accuracies": 1.0, "rewards/chosen": -0.2100994884967804, "rewards/margins": 0.15146715939044952, "rewards/rejected": -0.3615666627883911, "step": 4004 }, { "epoch": 10.965092402464066, "grad_norm": 3.937727451324463, "learning_rate": 4.513698630136986e-07, "log_odds_chosen": 3.1252756118774414, "log_odds_ratio": -0.15713685750961304, "logits/chosen": 0.7821594476699829, "logits/rejected": 0.7427628636360168, "logps/chosen": -1.352554440498352, "logps/rejected": -4.209189414978027, "loss": 0.5381, "nll_loss": 0.5224111080169678, "rewards/accuracies": 1.0, "rewards/chosen": -0.13525544106960297, "rewards/margins": 0.28566351532936096, "rewards/rejected": -0.4209190011024475, "step": 4005 }, { "epoch": 10.967830253251197, "grad_norm": 3.531982898712158, "learning_rate": 4.512328767123287e-07, "log_odds_chosen": 2.212310552597046, "log_odds_ratio": -0.361727237701416, "logits/chosen": 1.3006107807159424, "logits/rejected": 1.3241640329360962, "logps/chosen": -2.7734477519989014, "logps/rejected": -4.945537090301514, "loss": 0.6529, "nll_loss": 0.6167325973510742, "rewards/accuracies": 0.875, "rewards/chosen": -0.2773447632789612, "rewards/margins": 0.21720892190933228, "rewards/rejected": -0.49455368518829346, "step": 4006 }, { "epoch": 10.97056810403833, "grad_norm": 3.6845760345458984, "learning_rate": 4.5109589041095887e-07, "log_odds_chosen": 3.166463851928711, "log_odds_ratio": -0.11150669306516647, "logits/chosen": 1.0177496671676636, "logits/rejected": 1.0278704166412354, "logps/chosen": -2.062868595123291, "logps/rejected": -5.061793327331543, "loss": 0.6044, "nll_loss": 0.5932931900024414, "rewards/accuracies": 1.0, "rewards/chosen": -0.20628687739372253, "rewards/margins": 0.29989248514175415, "rewards/rejected": -0.5061793327331543, "step": 4007 }, { "epoch": 10.973305954825461, "grad_norm": 3.3962507247924805, "learning_rate": 4.50958904109589e-07, "log_odds_chosen": 3.775848388671875, "log_odds_ratio": -0.16737155616283417, "logits/chosen": 1.1096769571304321, "logits/rejected": 1.0458011627197266, "logps/chosen": -1.5571682453155518, "logps/rejected": -5.088018894195557, "loss": 0.5837, "nll_loss": 0.566964864730835, "rewards/accuracies": 1.0, "rewards/chosen": -0.15571682155132294, "rewards/margins": 0.3530850410461426, "rewards/rejected": -0.5088018774986267, "step": 4008 }, { "epoch": 10.976043805612594, "grad_norm": 3.757685899734497, "learning_rate": 4.5082191780821917e-07, "log_odds_chosen": 1.6311064958572388, "log_odds_ratio": -0.24959556758403778, "logits/chosen": 0.726954460144043, "logits/rejected": 0.6428927779197693, "logps/chosen": -1.484998106956482, "logps/rejected": -2.913726329803467, "loss": 0.5136, "nll_loss": 0.48862481117248535, "rewards/accuracies": 1.0, "rewards/chosen": -0.14849981665611267, "rewards/margins": 0.14287281036376953, "rewards/rejected": -0.2913725972175598, "step": 4009 }, { "epoch": 10.978781656399725, "grad_norm": 9.21694564819336, "learning_rate": 4.5068493150684927e-07, "log_odds_chosen": 1.2755755186080933, "log_odds_ratio": -0.5606070756912231, "logits/chosen": 1.130688190460205, "logits/rejected": 1.057273268699646, "logps/chosen": -2.6655068397521973, "logps/rejected": -3.793322801589966, "loss": 0.7062, "nll_loss": 0.6501279473304749, "rewards/accuracies": 0.75, "rewards/chosen": -0.2665506601333618, "rewards/margins": 0.11278160661458969, "rewards/rejected": -0.3793322741985321, "step": 4010 }, { "epoch": 10.981519507186858, "grad_norm": 3.5538387298583984, "learning_rate": 4.505479452054795e-07, "log_odds_chosen": 3.731135845184326, "log_odds_ratio": -0.2097056657075882, "logits/chosen": 1.052174687385559, "logits/rejected": 1.010454535484314, "logps/chosen": -1.6656440496444702, "logps/rejected": -5.226997375488281, "loss": 0.4788, "nll_loss": 0.45785757899284363, "rewards/accuracies": 1.0, "rewards/chosen": -0.16656440496444702, "rewards/margins": 0.3561353087425232, "rewards/rejected": -0.5226997137069702, "step": 4011 }, { "epoch": 10.984257357973991, "grad_norm": 4.180662155151367, "learning_rate": 4.504109589041096e-07, "log_odds_chosen": 2.0844099521636963, "log_odds_ratio": -0.3500792384147644, "logits/chosen": 1.002403974533081, "logits/rejected": 1.1428065299987793, "logps/chosen": -2.4146809577941895, "logps/rejected": -4.336657524108887, "loss": 0.6593, "nll_loss": 0.6242672801017761, "rewards/accuracies": 0.875, "rewards/chosen": -0.24146810173988342, "rewards/margins": 0.19219765067100525, "rewards/rejected": -0.43366575241088867, "step": 4012 }, { "epoch": 10.986995208761122, "grad_norm": 3.357083559036255, "learning_rate": 4.502739726027397e-07, "log_odds_chosen": 3.847476005554199, "log_odds_ratio": -0.1758953481912613, "logits/chosen": 1.0382457971572876, "logits/rejected": 1.0372507572174072, "logps/chosen": -1.4167706966400146, "logps/rejected": -5.0196380615234375, "loss": 0.5335, "nll_loss": 0.5159485340118408, "rewards/accuracies": 1.0, "rewards/chosen": -0.14167708158493042, "rewards/margins": 0.36028677225112915, "rewards/rejected": -0.5019638538360596, "step": 4013 }, { "epoch": 10.989733059548255, "grad_norm": 3.3205554485321045, "learning_rate": 4.501369863013699e-07, "log_odds_chosen": 3.964991569519043, "log_odds_ratio": -0.14793603122234344, "logits/chosen": 0.9143495559692383, "logits/rejected": 0.923214316368103, "logps/chosen": -1.681506872177124, "logps/rejected": -5.4263596534729, "loss": 0.5546, "nll_loss": 0.5397959351539612, "rewards/accuracies": 1.0, "rewards/chosen": -0.16815069317817688, "rewards/margins": 0.37448522448539734, "rewards/rejected": -0.5426359176635742, "step": 4014 }, { "epoch": 10.992470910335387, "grad_norm": 4.2027130126953125, "learning_rate": 4.5e-07, "log_odds_chosen": 2.238051414489746, "log_odds_ratio": -0.250278115272522, "logits/chosen": 1.2929532527923584, "logits/rejected": 1.3317434787750244, "logps/chosen": -1.9836602210998535, "logps/rejected": -4.050245761871338, "loss": 0.5496, "nll_loss": 0.5245466232299805, "rewards/accuracies": 0.875, "rewards/chosen": -0.19836601614952087, "rewards/margins": 0.20665857195854187, "rewards/rejected": -0.40502458810806274, "step": 4015 }, { "epoch": 10.99520876112252, "grad_norm": 3.779290199279785, "learning_rate": 4.4986301369863013e-07, "log_odds_chosen": 2.236128568649292, "log_odds_ratio": -0.24381110072135925, "logits/chosen": 0.8846771717071533, "logits/rejected": 0.7773444056510925, "logps/chosen": -1.2432355880737305, "logps/rejected": -3.235809803009033, "loss": 0.5665, "nll_loss": 0.5421268939971924, "rewards/accuracies": 1.0, "rewards/chosen": -0.12432355433702469, "rewards/margins": 0.19925743341445923, "rewards/rejected": -0.3235809803009033, "step": 4016 }, { "epoch": 10.99794661190965, "grad_norm": 12.250397682189941, "learning_rate": 4.4972602739726023e-07, "log_odds_chosen": 0.6697204113006592, "log_odds_ratio": -0.9330980181694031, "logits/chosen": 0.9360303282737732, "logits/rejected": 0.8925784826278687, "logps/chosen": -3.1420366764068604, "logps/rejected": -3.7214276790618896, "loss": 0.636, "nll_loss": 0.542648196220398, "rewards/accuracies": 0.625, "rewards/chosen": -0.314203679561615, "rewards/margins": 0.0579390823841095, "rewards/rejected": -0.3721427619457245, "step": 4017 }, { "epoch": 11.000684462696784, "grad_norm": 3.0927348136901855, "learning_rate": 4.495890410958904e-07, "log_odds_chosen": 2.3212811946868896, "log_odds_ratio": -0.22088715434074402, "logits/chosen": 0.8445045948028564, "logits/rejected": 0.7502391338348389, "logps/chosen": -1.7962546348571777, "logps/rejected": -3.94132661819458, "loss": 0.5418, "nll_loss": 0.5197226405143738, "rewards/accuracies": 1.0, "rewards/chosen": -0.17962546646595, "rewards/margins": 0.21450719237327576, "rewards/rejected": -0.39413267374038696, "step": 4018 }, { "epoch": 11.003422313483915, "grad_norm": 3.719496250152588, "learning_rate": 4.4945205479452054e-07, "log_odds_chosen": 1.8344674110412598, "log_odds_ratio": -0.3120957314968109, "logits/chosen": 1.0141820907592773, "logits/rejected": 1.0152305364608765, "logps/chosen": -1.6156642436981201, "logps/rejected": -3.295382261276245, "loss": 0.6627, "nll_loss": 0.631527841091156, "rewards/accuracies": 1.0, "rewards/chosen": -0.16156643629074097, "rewards/margins": 0.16797181963920593, "rewards/rejected": -0.3295382559299469, "step": 4019 }, { "epoch": 11.006160164271048, "grad_norm": 3.7444255352020264, "learning_rate": 4.4931506849315063e-07, "log_odds_chosen": 2.4277873039245605, "log_odds_ratio": -0.23922234773635864, "logits/chosen": 1.2220193147659302, "logits/rejected": 1.0712041854858398, "logps/chosen": -1.312219500541687, "logps/rejected": -3.4213733673095703, "loss": 0.4621, "nll_loss": 0.4382190406322479, "rewards/accuracies": 0.875, "rewards/chosen": -0.1312219500541687, "rewards/margins": 0.21091537177562714, "rewards/rejected": -0.34213733673095703, "step": 4020 }, { "epoch": 11.008898015058179, "grad_norm": 4.966752529144287, "learning_rate": 4.4917808219178084e-07, "log_odds_chosen": 3.7280311584472656, "log_odds_ratio": -0.3954043984413147, "logits/chosen": 1.0987002849578857, "logits/rejected": 1.1355807781219482, "logps/chosen": -2.279273509979248, "logps/rejected": -5.771482467651367, "loss": 0.7017, "nll_loss": 0.6622093915939331, "rewards/accuracies": 0.875, "rewards/chosen": -0.2279273420572281, "rewards/margins": 0.349220871925354, "rewards/rejected": -0.5771481990814209, "step": 4021 }, { "epoch": 11.011635865845312, "grad_norm": 3.4112274646759033, "learning_rate": 4.4904109589041094e-07, "log_odds_chosen": 5.2805962562561035, "log_odds_ratio": -0.10630203038454056, "logits/chosen": 0.938823401927948, "logits/rejected": 0.954414427280426, "logps/chosen": -2.2047719955444336, "logps/rejected": -7.3692851066589355, "loss": 0.6123, "nll_loss": 0.6017040014266968, "rewards/accuracies": 1.0, "rewards/chosen": -0.22047720849514008, "rewards/margins": 0.516451358795166, "rewards/rejected": -0.7369285821914673, "step": 4022 }, { "epoch": 11.014373716632443, "grad_norm": 3.2849814891815186, "learning_rate": 4.489041095890411e-07, "log_odds_chosen": 1.9021363258361816, "log_odds_ratio": -0.2711092233657837, "logits/chosen": 0.8497856855392456, "logits/rejected": 0.814761757850647, "logps/chosen": -1.9574155807495117, "logps/rejected": -3.730323076248169, "loss": 0.5796, "nll_loss": 0.5525223016738892, "rewards/accuracies": 0.75, "rewards/chosen": -0.19574156403541565, "rewards/margins": 0.17729073762893677, "rewards/rejected": -0.3730323016643524, "step": 4023 }, { "epoch": 11.017111567419576, "grad_norm": 5.44974946975708, "learning_rate": 4.487671232876712e-07, "log_odds_chosen": 2.0437259674072266, "log_odds_ratio": -0.22052490711212158, "logits/chosen": 0.6991690397262573, "logits/rejected": 0.6053276062011719, "logps/chosen": -2.366457462310791, "logps/rejected": -4.267326354980469, "loss": 0.8203, "nll_loss": 0.7982205152511597, "rewards/accuracies": 1.0, "rewards/chosen": -0.23664574325084686, "rewards/margins": 0.19008693099021912, "rewards/rejected": -0.4267326593399048, "step": 4024 }, { "epoch": 11.019849418206707, "grad_norm": 7.422955513000488, "learning_rate": 4.4863013698630134e-07, "log_odds_chosen": 1.8969752788543701, "log_odds_ratio": -0.33189648389816284, "logits/chosen": 0.9921488761901855, "logits/rejected": 0.9927588701248169, "logps/chosen": -2.6326537132263184, "logps/rejected": -4.467106819152832, "loss": 0.6912, "nll_loss": 0.6579781770706177, "rewards/accuracies": 0.875, "rewards/chosen": -0.26326537132263184, "rewards/margins": 0.18344533443450928, "rewards/rejected": -0.4467107057571411, "step": 4025 }, { "epoch": 11.02258726899384, "grad_norm": 3.3325083255767822, "learning_rate": 4.484931506849315e-07, "log_odds_chosen": 3.7916834354400635, "log_odds_ratio": -0.2096318006515503, "logits/chosen": 1.267375111579895, "logits/rejected": 1.276605486869812, "logps/chosen": -2.1292028427124023, "logps/rejected": -5.8249359130859375, "loss": 0.5646, "nll_loss": 0.54368656873703, "rewards/accuracies": 0.875, "rewards/chosen": -0.21292027831077576, "rewards/margins": 0.36957329511642456, "rewards/rejected": -0.5824935436248779, "step": 4026 }, { "epoch": 11.025325119780971, "grad_norm": 3.5645487308502197, "learning_rate": 4.483561643835616e-07, "log_odds_chosen": 2.35404109954834, "log_odds_ratio": -0.38066965341567993, "logits/chosen": 0.9035511016845703, "logits/rejected": 0.8713600635528564, "logps/chosen": -2.482893943786621, "logps/rejected": -4.715436935424805, "loss": 0.5736, "nll_loss": 0.5354958772659302, "rewards/accuracies": 0.875, "rewards/chosen": -0.24828940629959106, "rewards/margins": 0.2232542783021927, "rewards/rejected": -0.47154369950294495, "step": 4027 }, { "epoch": 11.028062970568104, "grad_norm": 7.955624580383301, "learning_rate": 4.482191780821918e-07, "log_odds_chosen": 2.185835838317871, "log_odds_ratio": -0.39519286155700684, "logits/chosen": 1.1611201763153076, "logits/rejected": 1.1941328048706055, "logps/chosen": -2.476191997528076, "logps/rejected": -4.485796928405762, "loss": 0.5594, "nll_loss": 0.5198981165885925, "rewards/accuracies": 0.875, "rewards/chosen": -0.24761921167373657, "rewards/margins": 0.20096050202846527, "rewards/rejected": -0.44857972860336304, "step": 4028 }, { "epoch": 11.030800821355236, "grad_norm": 3.688586950302124, "learning_rate": 4.480821917808219e-07, "log_odds_chosen": 1.354475975036621, "log_odds_ratio": -0.4302363395690918, "logits/chosen": 1.1177746057510376, "logits/rejected": 1.117912769317627, "logps/chosen": -1.8597573041915894, "logps/rejected": -3.146962881088257, "loss": 0.5307, "nll_loss": 0.4877236783504486, "rewards/accuracies": 0.625, "rewards/chosen": -0.18597573041915894, "rewards/margins": 0.12872055172920227, "rewards/rejected": -0.3146962821483612, "step": 4029 }, { "epoch": 11.033538672142368, "grad_norm": 3.7244584560394287, "learning_rate": 4.4794520547945205e-07, "log_odds_chosen": 1.8965052366256714, "log_odds_ratio": -0.39318180084228516, "logits/chosen": 0.7154830694198608, "logits/rejected": 0.7236642837524414, "logps/chosen": -1.9141273498535156, "logps/rejected": -3.6923036575317383, "loss": 0.7127, "nll_loss": 0.6734297275543213, "rewards/accuracies": 0.75, "rewards/chosen": -0.19141273200511932, "rewards/margins": 0.17781761288642883, "rewards/rejected": -0.36923038959503174, "step": 4030 }, { "epoch": 11.0362765229295, "grad_norm": 4.606873035430908, "learning_rate": 4.4780821917808215e-07, "log_odds_chosen": 1.3823165893554688, "log_odds_ratio": -0.42563340067863464, "logits/chosen": 0.8487762212753296, "logits/rejected": 0.7883486747741699, "logps/chosen": -1.7426623106002808, "logps/rejected": -2.9624757766723633, "loss": 0.5171, "nll_loss": 0.4744877815246582, "rewards/accuracies": 0.75, "rewards/chosen": -0.17426623404026031, "rewards/margins": 0.12198136001825333, "rewards/rejected": -0.29624760150909424, "step": 4031 }, { "epoch": 11.039014373716633, "grad_norm": 5.127861976623535, "learning_rate": 4.476712328767123e-07, "log_odds_chosen": 1.3776700496673584, "log_odds_ratio": -0.31100159883499146, "logits/chosen": 0.7468404173851013, "logits/rejected": 0.7432487607002258, "logps/chosen": -2.581380844116211, "logps/rejected": -3.862760543823242, "loss": 0.7982, "nll_loss": 0.767055869102478, "rewards/accuracies": 0.875, "rewards/chosen": -0.2581380605697632, "rewards/margins": 0.1281379610300064, "rewards/rejected": -0.3862760066986084, "step": 4032 }, { "epoch": 11.041752224503764, "grad_norm": 4.815066337585449, "learning_rate": 4.4753424657534245e-07, "log_odds_chosen": 2.644014358520508, "log_odds_ratio": -0.4690210819244385, "logits/chosen": 0.7067572474479675, "logits/rejected": 0.7909504771232605, "logps/chosen": -2.2123091220855713, "logps/rejected": -4.7476806640625, "loss": 0.5704, "nll_loss": 0.5234596133232117, "rewards/accuracies": 0.75, "rewards/chosen": -0.2212309092283249, "rewards/margins": 0.253537118434906, "rewards/rejected": -0.4747680425643921, "step": 4033 }, { "epoch": 11.044490075290897, "grad_norm": 3.058722734451294, "learning_rate": 4.4739726027397255e-07, "log_odds_chosen": 1.1703529357910156, "log_odds_ratio": -0.3393647074699402, "logits/chosen": 0.960903525352478, "logits/rejected": 0.8873339295387268, "logps/chosen": -1.6422176361083984, "logps/rejected": -2.5972001552581787, "loss": 0.6027, "nll_loss": 0.5687275528907776, "rewards/accuracies": 0.875, "rewards/chosen": -0.16422176361083984, "rewards/margins": 0.09549826383590698, "rewards/rejected": -0.2597200274467468, "step": 4034 }, { "epoch": 11.047227926078028, "grad_norm": 3.708533763885498, "learning_rate": 4.4726027397260276e-07, "log_odds_chosen": 4.745699405670166, "log_odds_ratio": -0.11202843487262726, "logits/chosen": 0.9051743149757385, "logits/rejected": 0.8730192184448242, "logps/chosen": -1.446427345275879, "logps/rejected": -5.864041328430176, "loss": 0.5493, "nll_loss": 0.5380650758743286, "rewards/accuracies": 1.0, "rewards/chosen": -0.14464275538921356, "rewards/margins": 0.44176143407821655, "rewards/rejected": -0.5864041447639465, "step": 4035 }, { "epoch": 11.049965776865161, "grad_norm": 3.666173219680786, "learning_rate": 4.4712328767123286e-07, "log_odds_chosen": 1.5511486530303955, "log_odds_ratio": -0.32206493616104126, "logits/chosen": 1.0312706232070923, "logits/rejected": 0.9456727504730225, "logps/chosen": -1.3133981227874756, "logps/rejected": -2.6465530395507812, "loss": 0.5393, "nll_loss": 0.5070652961730957, "rewards/accuracies": 1.0, "rewards/chosen": -0.13133981823921204, "rewards/margins": 0.13331550359725952, "rewards/rejected": -0.26465529203414917, "step": 4036 }, { "epoch": 11.052703627652292, "grad_norm": 6.141946792602539, "learning_rate": 4.46986301369863e-07, "log_odds_chosen": 2.146475315093994, "log_odds_ratio": -0.2851622998714447, "logits/chosen": 0.7844077348709106, "logits/rejected": 0.7173985838890076, "logps/chosen": -1.6666409969329834, "logps/rejected": -3.5526063442230225, "loss": 0.5308, "nll_loss": 0.5023082494735718, "rewards/accuracies": 0.875, "rewards/chosen": -0.16666410863399506, "rewards/margins": 0.18859654664993286, "rewards/rejected": -0.3552606701850891, "step": 4037 }, { "epoch": 11.055441478439425, "grad_norm": 3.5454108715057373, "learning_rate": 4.468493150684931e-07, "log_odds_chosen": 1.4579650163650513, "log_odds_ratio": -0.33562248945236206, "logits/chosen": 1.1714251041412354, "logits/rejected": 1.0539236068725586, "logps/chosen": -1.4433553218841553, "logps/rejected": -2.7100932598114014, "loss": 0.5218, "nll_loss": 0.4882844090461731, "rewards/accuracies": 0.875, "rewards/chosen": -0.14433553814888, "rewards/margins": 0.12667378783226013, "rewards/rejected": -0.27100932598114014, "step": 4038 }, { "epoch": 11.058179329226558, "grad_norm": 3.436988115310669, "learning_rate": 4.4671232876712326e-07, "log_odds_chosen": 2.8887147903442383, "log_odds_ratio": -0.21573366224765778, "logits/chosen": 0.6470590829849243, "logits/rejected": 0.5632632970809937, "logps/chosen": -1.8504905700683594, "logps/rejected": -4.5893988609313965, "loss": 0.5694, "nll_loss": 0.5478649139404297, "rewards/accuracies": 1.0, "rewards/chosen": -0.18504904210567474, "rewards/margins": 0.2738908529281616, "rewards/rejected": -0.45893990993499756, "step": 4039 }, { "epoch": 11.06091718001369, "grad_norm": 6.315893650054932, "learning_rate": 4.465753424657534e-07, "log_odds_chosen": 2.4595565795898438, "log_odds_ratio": -0.22319531440734863, "logits/chosen": 0.9364933967590332, "logits/rejected": 0.883469820022583, "logps/chosen": -1.6067936420440674, "logps/rejected": -3.7988762855529785, "loss": 0.5861, "nll_loss": 0.5637444853782654, "rewards/accuracies": 1.0, "rewards/chosen": -0.16067937016487122, "rewards/margins": 0.2192082554101944, "rewards/rejected": -0.3798876106739044, "step": 4040 }, { "epoch": 11.063655030800822, "grad_norm": 3.9601447582244873, "learning_rate": 4.464383561643835e-07, "log_odds_chosen": 2.864344835281372, "log_odds_ratio": -0.1557854413986206, "logits/chosen": 1.2089059352874756, "logits/rejected": 1.2549926042556763, "logps/chosen": -2.2226881980895996, "logps/rejected": -4.956060886383057, "loss": 0.7035, "nll_loss": 0.6879532933235168, "rewards/accuracies": 1.0, "rewards/chosen": -0.22226883471012115, "rewards/margins": 0.2733372449874878, "rewards/rejected": -0.49560606479644775, "step": 4041 }, { "epoch": 11.066392881587953, "grad_norm": 7.262973308563232, "learning_rate": 4.463013698630137e-07, "log_odds_chosen": 1.920475959777832, "log_odds_ratio": -0.6460530757904053, "logits/chosen": 1.2333619594573975, "logits/rejected": 1.2050244808197021, "logps/chosen": -2.566131591796875, "logps/rejected": -4.357271671295166, "loss": 0.6616, "nll_loss": 0.5970286726951599, "rewards/accuracies": 0.875, "rewards/chosen": -0.256613165140152, "rewards/margins": 0.17911401391029358, "rewards/rejected": -0.43572717905044556, "step": 4042 }, { "epoch": 11.069130732375086, "grad_norm": 3.513333559036255, "learning_rate": 4.461643835616438e-07, "log_odds_chosen": 2.1259281635284424, "log_odds_ratio": -0.16979143023490906, "logits/chosen": 0.9159227013587952, "logits/rejected": 0.9441806674003601, "logps/chosen": -2.115917444229126, "logps/rejected": -4.091799259185791, "loss": 0.5528, "nll_loss": 0.5357925891876221, "rewards/accuracies": 1.0, "rewards/chosen": -0.21159175038337708, "rewards/margins": 0.19758814573287964, "rewards/rejected": -0.4091799259185791, "step": 4043 }, { "epoch": 11.071868583162217, "grad_norm": 3.9128127098083496, "learning_rate": 4.4602739726027397e-07, "log_odds_chosen": 0.8438678979873657, "log_odds_ratio": -0.4331093430519104, "logits/chosen": 0.9953184127807617, "logits/rejected": 0.961358904838562, "logps/chosen": -2.1575846672058105, "logps/rejected": -2.911677837371826, "loss": 0.582, "nll_loss": 0.5387310981750488, "rewards/accuracies": 0.75, "rewards/chosen": -0.21575847268104553, "rewards/margins": 0.07540936022996902, "rewards/rejected": -0.29116782546043396, "step": 4044 }, { "epoch": 11.07460643394935, "grad_norm": 3.407863140106201, "learning_rate": 4.4589041095890407e-07, "log_odds_chosen": 3.071780204772949, "log_odds_ratio": -0.13112129271030426, "logits/chosen": 1.1928492784500122, "logits/rejected": 1.2183575630187988, "logps/chosen": -2.1171321868896484, "logps/rejected": -5.038127899169922, "loss": 0.6071, "nll_loss": 0.5940262675285339, "rewards/accuracies": 1.0, "rewards/chosen": -0.21171320974826813, "rewards/margins": 0.29209962487220764, "rewards/rejected": -0.5038127899169922, "step": 4045 }, { "epoch": 11.077344284736482, "grad_norm": 4.917338848114014, "learning_rate": 4.457534246575342e-07, "log_odds_chosen": 2.337380886077881, "log_odds_ratio": -0.3549295663833618, "logits/chosen": 0.8996273279190063, "logits/rejected": 0.9203490614891052, "logps/chosen": -2.3065133094787598, "logps/rejected": -4.573495864868164, "loss": 0.636, "nll_loss": 0.6005436778068542, "rewards/accuracies": 0.75, "rewards/chosen": -0.2306513637304306, "rewards/margins": 0.22669826447963715, "rewards/rejected": -0.45734959840774536, "step": 4046 }, { "epoch": 11.080082135523615, "grad_norm": 4.153078556060791, "learning_rate": 4.4561643835616437e-07, "log_odds_chosen": 3.2374274730682373, "log_odds_ratio": -0.1856824904680252, "logits/chosen": 1.0886945724487305, "logits/rejected": 1.1207064390182495, "logps/chosen": -1.6155588626861572, "logps/rejected": -4.54334831237793, "loss": 0.5665, "nll_loss": 0.5479310750961304, "rewards/accuracies": 1.0, "rewards/chosen": -0.16155588626861572, "rewards/margins": 0.29277893900871277, "rewards/rejected": -0.4543348550796509, "step": 4047 }, { "epoch": 11.082819986310746, "grad_norm": 3.669440746307373, "learning_rate": 4.4547945205479447e-07, "log_odds_chosen": 3.1239218711853027, "log_odds_ratio": -0.23709549009799957, "logits/chosen": 0.8366182446479797, "logits/rejected": 0.8243810534477234, "logps/chosen": -1.5567710399627686, "logps/rejected": -4.513423442840576, "loss": 0.5883, "nll_loss": 0.5646318197250366, "rewards/accuracies": 1.0, "rewards/chosen": -0.15567709505558014, "rewards/margins": 0.29566526412963867, "rewards/rejected": -0.4513423442840576, "step": 4048 }, { "epoch": 11.085557837097879, "grad_norm": 4.854149341583252, "learning_rate": 4.453424657534247e-07, "log_odds_chosen": 1.568334698677063, "log_odds_ratio": -0.42849916219711304, "logits/chosen": 0.9928022623062134, "logits/rejected": 1.0142520666122437, "logps/chosen": -2.4321043491363525, "logps/rejected": -3.831653118133545, "loss": 0.602, "nll_loss": 0.5591941475868225, "rewards/accuracies": 0.875, "rewards/chosen": -0.24321044981479645, "rewards/margins": 0.1399548500776291, "rewards/rejected": -0.3831653296947479, "step": 4049 }, { "epoch": 11.08829568788501, "grad_norm": 5.865650177001953, "learning_rate": 4.4520547945205477e-07, "log_odds_chosen": 1.9327175617218018, "log_odds_ratio": -0.671887218952179, "logits/chosen": 0.9458592534065247, "logits/rejected": 0.8895716667175293, "logps/chosen": -2.0351645946502686, "logps/rejected": -3.88716721534729, "loss": 0.7729, "nll_loss": 0.7057557702064514, "rewards/accuracies": 0.875, "rewards/chosen": -0.20351645350456238, "rewards/margins": 0.1852002590894699, "rewards/rejected": -0.3887166976928711, "step": 4050 }, { "epoch": 11.091033538672143, "grad_norm": 5.765559673309326, "learning_rate": 4.4506849315068487e-07, "log_odds_chosen": 1.4010343551635742, "log_odds_ratio": -0.44785043597221375, "logits/chosen": 0.9307522177696228, "logits/rejected": 0.8671173453330994, "logps/chosen": -2.3583269119262695, "logps/rejected": -3.679208278656006, "loss": 0.7451, "nll_loss": 0.7002655267715454, "rewards/accuracies": 0.875, "rewards/chosen": -0.23583269119262695, "rewards/margins": 0.13208813965320587, "rewards/rejected": -0.367920845746994, "step": 4051 }, { "epoch": 11.093771389459274, "grad_norm": 4.134527206420898, "learning_rate": 4.449315068493151e-07, "log_odds_chosen": 2.7641491889953613, "log_odds_ratio": -0.22292396426200867, "logits/chosen": 0.8402334451675415, "logits/rejected": 0.8336459398269653, "logps/chosen": -2.5104990005493164, "logps/rejected": -5.169658660888672, "loss": 0.6173, "nll_loss": 0.5950561761856079, "rewards/accuracies": 1.0, "rewards/chosen": -0.2510499060153961, "rewards/margins": 0.2659159302711487, "rewards/rejected": -0.5169658660888672, "step": 4052 }, { "epoch": 11.096509240246407, "grad_norm": 3.643017053604126, "learning_rate": 4.447945205479452e-07, "log_odds_chosen": 4.092827320098877, "log_odds_ratio": -0.1945095956325531, "logits/chosen": 1.026339054107666, "logits/rejected": 1.022071123123169, "logps/chosen": -2.2405786514282227, "logps/rejected": -6.241199970245361, "loss": 0.6457, "nll_loss": 0.6262649893760681, "rewards/accuracies": 0.875, "rewards/chosen": -0.2240578532218933, "rewards/margins": 0.40006211400032043, "rewards/rejected": -0.6241199374198914, "step": 4053 }, { "epoch": 11.099247091033538, "grad_norm": 2.979482650756836, "learning_rate": 4.4465753424657533e-07, "log_odds_chosen": 2.3529300689697266, "log_odds_ratio": -0.13876399397850037, "logits/chosen": 1.125981330871582, "logits/rejected": 1.0270037651062012, "logps/chosen": -1.392447590827942, "logps/rejected": -3.4851572513580322, "loss": 0.5394, "nll_loss": 0.5254814028739929, "rewards/accuracies": 1.0, "rewards/chosen": -0.13924476504325867, "rewards/margins": 0.20927096903324127, "rewards/rejected": -0.34851574897766113, "step": 4054 }, { "epoch": 11.101984941820671, "grad_norm": 5.024425029754639, "learning_rate": 4.4452054794520543e-07, "log_odds_chosen": 2.9102940559387207, "log_odds_ratio": -0.16660311818122864, "logits/chosen": 1.251184105873108, "logits/rejected": 1.225541591644287, "logps/chosen": -1.9656776189804077, "logps/rejected": -4.617307662963867, "loss": 0.5882, "nll_loss": 0.5715126991271973, "rewards/accuracies": 1.0, "rewards/chosen": -0.19656775891780853, "rewards/margins": 0.26516303420066833, "rewards/rejected": -0.4617307782173157, "step": 4055 }, { "epoch": 11.104722792607802, "grad_norm": 3.213122844696045, "learning_rate": 4.4438356164383563e-07, "log_odds_chosen": 3.3286631107330322, "log_odds_ratio": -0.11141063272953033, "logits/chosen": 1.0332248210906982, "logits/rejected": 1.0228238105773926, "logps/chosen": -2.046666145324707, "logps/rejected": -5.213430881500244, "loss": 0.5562, "nll_loss": 0.5451037287712097, "rewards/accuracies": 1.0, "rewards/chosen": -0.2046665996313095, "rewards/margins": 0.3166764974594116, "rewards/rejected": -0.5213431119918823, "step": 4056 }, { "epoch": 11.107460643394935, "grad_norm": 7.329147815704346, "learning_rate": 4.4424657534246573e-07, "log_odds_chosen": 2.2670905590057373, "log_odds_ratio": -0.329388290643692, "logits/chosen": 0.8745092153549194, "logits/rejected": 0.8352372646331787, "logps/chosen": -2.5524981021881104, "logps/rejected": -4.689916610717773, "loss": 0.6673, "nll_loss": 0.6344001889228821, "rewards/accuracies": 0.875, "rewards/chosen": -0.2552497982978821, "rewards/margins": 0.21374185383319855, "rewards/rejected": -0.46899163722991943, "step": 4057 }, { "epoch": 11.110198494182066, "grad_norm": 3.397473096847534, "learning_rate": 4.4410958904109583e-07, "log_odds_chosen": 3.242131233215332, "log_odds_ratio": -0.14366938173770905, "logits/chosen": 1.126130223274231, "logits/rejected": 1.161362886428833, "logps/chosen": -2.587892770767212, "logps/rejected": -5.660437107086182, "loss": 0.6085, "nll_loss": 0.5940939784049988, "rewards/accuracies": 1.0, "rewards/chosen": -0.2587892711162567, "rewards/margins": 0.30725446343421936, "rewards/rejected": -0.5660437345504761, "step": 4058 }, { "epoch": 11.1129363449692, "grad_norm": 3.8538577556610107, "learning_rate": 4.4397260273972604e-07, "log_odds_chosen": 3.034972667694092, "log_odds_ratio": -0.19236375391483307, "logits/chosen": 0.8221583962440491, "logits/rejected": 0.5942510366439819, "logps/chosen": -1.4295307397842407, "logps/rejected": -4.254230499267578, "loss": 0.5902, "nll_loss": 0.5709506869316101, "rewards/accuracies": 1.0, "rewards/chosen": -0.1429530680179596, "rewards/margins": 0.2824699878692627, "rewards/rejected": -0.4254230260848999, "step": 4059 }, { "epoch": 11.11567419575633, "grad_norm": 5.050456523895264, "learning_rate": 4.4383561643835613e-07, "log_odds_chosen": 0.5233545303344727, "log_odds_ratio": -0.6233153343200684, "logits/chosen": 0.9215101003646851, "logits/rejected": 0.8994172215461731, "logps/chosen": -2.444464683532715, "logps/rejected": -2.8645248413085938, "loss": 0.749, "nll_loss": 0.6866503953933716, "rewards/accuracies": 0.75, "rewards/chosen": -0.24444645643234253, "rewards/margins": 0.042006026953458786, "rewards/rejected": -0.2864524722099304, "step": 4060 }, { "epoch": 11.118412046543463, "grad_norm": 3.5856778621673584, "learning_rate": 4.436986301369863e-07, "log_odds_chosen": 3.497807502746582, "log_odds_ratio": -0.1682160496711731, "logits/chosen": 0.9496890902519226, "logits/rejected": 0.9872567057609558, "logps/chosen": -2.0559611320495605, "logps/rejected": -5.444376468658447, "loss": 0.7953, "nll_loss": 0.7784830927848816, "rewards/accuracies": 1.0, "rewards/chosen": -0.20559611916542053, "rewards/margins": 0.3388415575027466, "rewards/rejected": -0.5444377064704895, "step": 4061 }, { "epoch": 11.121149897330595, "grad_norm": 6.56121826171875, "learning_rate": 4.435616438356164e-07, "log_odds_chosen": 3.309368848800659, "log_odds_ratio": -0.5051957964897156, "logits/chosen": 1.3534609079360962, "logits/rejected": 1.4072470664978027, "logps/chosen": -2.696066379547119, "logps/rejected": -5.923907279968262, "loss": 0.6738, "nll_loss": 0.6232486367225647, "rewards/accuracies": 0.875, "rewards/chosen": -0.26960664987564087, "rewards/margins": 0.3227841556072235, "rewards/rejected": -0.592390775680542, "step": 4062 }, { "epoch": 11.123887748117728, "grad_norm": 3.3576602935791016, "learning_rate": 4.434246575342466e-07, "log_odds_chosen": 1.560640811920166, "log_odds_ratio": -0.27564680576324463, "logits/chosen": 1.0360456705093384, "logits/rejected": 0.99543297290802, "logps/chosen": -1.5127044916152954, "logps/rejected": -2.8990795612335205, "loss": 0.5531, "nll_loss": 0.5255751609802246, "rewards/accuracies": 0.875, "rewards/chosen": -0.15127046406269073, "rewards/margins": 0.138637512922287, "rewards/rejected": -0.28990796208381653, "step": 4063 }, { "epoch": 11.126625598904859, "grad_norm": 4.168731689453125, "learning_rate": 4.432876712328767e-07, "log_odds_chosen": 1.4157942533493042, "log_odds_ratio": -0.3718385100364685, "logits/chosen": 0.8252812623977661, "logits/rejected": 0.8016542792320251, "logps/chosen": -2.10872745513916, "logps/rejected": -3.428614616394043, "loss": 0.6096, "nll_loss": 0.572449803352356, "rewards/accuracies": 0.875, "rewards/chosen": -0.21087276935577393, "rewards/margins": 0.13198870420455933, "rewards/rejected": -0.34286144375801086, "step": 4064 }, { "epoch": 11.129363449691992, "grad_norm": 3.3567254543304443, "learning_rate": 4.431506849315068e-07, "log_odds_chosen": 2.648921489715576, "log_odds_ratio": -0.21651582419872284, "logits/chosen": 0.6957886219024658, "logits/rejected": 0.5744999647140503, "logps/chosen": -1.7293438911437988, "logps/rejected": -4.217304229736328, "loss": 0.7179, "nll_loss": 0.6962883472442627, "rewards/accuracies": 1.0, "rewards/chosen": -0.1729343831539154, "rewards/margins": 0.2487960010766983, "rewards/rejected": -0.4217303991317749, "step": 4065 }, { "epoch": 11.132101300479125, "grad_norm": 3.5905020236968994, "learning_rate": 4.43013698630137e-07, "log_odds_chosen": 2.859813690185547, "log_odds_ratio": -0.22305826842784882, "logits/chosen": 1.077303409576416, "logits/rejected": 1.063910961151123, "logps/chosen": -1.6730927228927612, "logps/rejected": -4.354916572570801, "loss": 0.66, "nll_loss": 0.637653648853302, "rewards/accuracies": 0.875, "rewards/chosen": -0.16730928421020508, "rewards/margins": 0.2681823968887329, "rewards/rejected": -0.4354917109012604, "step": 4066 }, { "epoch": 11.134839151266256, "grad_norm": 4.380580902099609, "learning_rate": 4.428767123287671e-07, "log_odds_chosen": 2.9862821102142334, "log_odds_ratio": -0.24104449152946472, "logits/chosen": 1.1082736253738403, "logits/rejected": 1.14082932472229, "logps/chosen": -2.333808183670044, "logps/rejected": -5.192625045776367, "loss": 0.5598, "nll_loss": 0.5356921553611755, "rewards/accuracies": 1.0, "rewards/chosen": -0.23338082432746887, "rewards/margins": 0.28588172793388367, "rewards/rejected": -0.5192625522613525, "step": 4067 }, { "epoch": 11.137577002053389, "grad_norm": 3.2060375213623047, "learning_rate": 4.4273972602739725e-07, "log_odds_chosen": 3.0564956665039062, "log_odds_ratio": -0.12172749638557434, "logits/chosen": 1.1228896379470825, "logits/rejected": 1.1564100980758667, "logps/chosen": -2.160545587539673, "logps/rejected": -5.066276550292969, "loss": 0.5941, "nll_loss": 0.5818994045257568, "rewards/accuracies": 1.0, "rewards/chosen": -0.21605455875396729, "rewards/margins": 0.2905730903148651, "rewards/rejected": -0.50662761926651, "step": 4068 }, { "epoch": 11.14031485284052, "grad_norm": 3.3068456649780273, "learning_rate": 4.4260273972602734e-07, "log_odds_chosen": 2.9585230350494385, "log_odds_ratio": -0.14739999175071716, "logits/chosen": 1.1092416048049927, "logits/rejected": 1.0841425657272339, "logps/chosen": -1.626659870147705, "logps/rejected": -4.347286224365234, "loss": 0.4853, "nll_loss": 0.47060516476631165, "rewards/accuracies": 1.0, "rewards/chosen": -0.16266599297523499, "rewards/margins": 0.27206262946128845, "rewards/rejected": -0.43472862243652344, "step": 4069 }, { "epoch": 11.143052703627653, "grad_norm": 4.051688194274902, "learning_rate": 4.4246575342465755e-07, "log_odds_chosen": 1.5206501483917236, "log_odds_ratio": -0.23603036999702454, "logits/chosen": 1.0576725006103516, "logits/rejected": 0.9837518930435181, "logps/chosen": -1.8640468120574951, "logps/rejected": -3.179990768432617, "loss": 0.5863, "nll_loss": 0.5627341866493225, "rewards/accuracies": 1.0, "rewards/chosen": -0.18640469014644623, "rewards/margins": 0.13159438967704773, "rewards/rejected": -0.31799906492233276, "step": 4070 }, { "epoch": 11.145790554414784, "grad_norm": 3.4514079093933105, "learning_rate": 4.4232876712328765e-07, "log_odds_chosen": 4.803802967071533, "log_odds_ratio": -0.10077319294214249, "logits/chosen": 1.2280293703079224, "logits/rejected": 1.29264497756958, "logps/chosen": -2.5788028240203857, "logps/rejected": -7.116460800170898, "loss": 0.588, "nll_loss": 0.5778908729553223, "rewards/accuracies": 1.0, "rewards/chosen": -0.2578802704811096, "rewards/margins": 0.45376577973365784, "rewards/rejected": -0.7116460800170898, "step": 4071 }, { "epoch": 11.148528405201917, "grad_norm": 4.112438201904297, "learning_rate": 4.4219178082191775e-07, "log_odds_chosen": 3.198673725128174, "log_odds_ratio": -0.16473890841007233, "logits/chosen": 0.9976122379302979, "logits/rejected": 0.9757595062255859, "logps/chosen": -2.3173413276672363, "logps/rejected": -5.351503849029541, "loss": 0.632, "nll_loss": 0.6155510544776917, "rewards/accuracies": 0.875, "rewards/chosen": -0.23173412680625916, "rewards/margins": 0.30341625213623047, "rewards/rejected": -0.535150408744812, "step": 4072 }, { "epoch": 11.151266255989048, "grad_norm": 3.6344797611236572, "learning_rate": 4.4205479452054795e-07, "log_odds_chosen": 2.3147635459899902, "log_odds_ratio": -0.20673173666000366, "logits/chosen": 1.1621173620224, "logits/rejected": 1.1803867816925049, "logps/chosen": -1.7118548154830933, "logps/rejected": -3.8775317668914795, "loss": 0.5248, "nll_loss": 0.5041171908378601, "rewards/accuracies": 1.0, "rewards/chosen": -0.1711854785680771, "rewards/margins": 0.21656769514083862, "rewards/rejected": -0.3877531886100769, "step": 4073 }, { "epoch": 11.154004106776181, "grad_norm": 3.100851535797119, "learning_rate": 4.4191780821917805e-07, "log_odds_chosen": 3.5726118087768555, "log_odds_ratio": -0.17980429530143738, "logits/chosen": 1.0551626682281494, "logits/rejected": 1.0450459718704224, "logps/chosen": -1.760164737701416, "logps/rejected": -5.147964000701904, "loss": 0.6361, "nll_loss": 0.6181198358535767, "rewards/accuracies": 1.0, "rewards/chosen": -0.1760164499282837, "rewards/margins": 0.3387799561023712, "rewards/rejected": -0.5147963762283325, "step": 4074 }, { "epoch": 11.156741957563312, "grad_norm": 6.881083011627197, "learning_rate": 4.417808219178082e-07, "log_odds_chosen": 2.8681631088256836, "log_odds_ratio": -0.12158405035734177, "logits/chosen": 1.1931716203689575, "logits/rejected": 1.1470361948013306, "logps/chosen": -2.8493878841400146, "logps/rejected": -5.6125688552856445, "loss": 0.6495, "nll_loss": 0.6373615264892578, "rewards/accuracies": 1.0, "rewards/chosen": -0.2849388122558594, "rewards/margins": 0.27631813287734985, "rewards/rejected": -0.5612569451332092, "step": 4075 }, { "epoch": 11.159479808350445, "grad_norm": 5.876870155334473, "learning_rate": 4.416438356164383e-07, "log_odds_chosen": 1.1330522298812866, "log_odds_ratio": -0.6260554194450378, "logits/chosen": 0.9715434312820435, "logits/rejected": 0.926798939704895, "logps/chosen": -1.9546433687210083, "logps/rejected": -2.896308422088623, "loss": 0.5146, "nll_loss": 0.45198071002960205, "rewards/accuracies": 0.75, "rewards/chosen": -0.1954643428325653, "rewards/margins": 0.09416648745536804, "rewards/rejected": -0.28963083028793335, "step": 4076 }, { "epoch": 11.162217659137577, "grad_norm": 3.597027063369751, "learning_rate": 4.415068493150685e-07, "log_odds_chosen": 1.5176483392715454, "log_odds_ratio": -0.22596994042396545, "logits/chosen": 0.9276239275932312, "logits/rejected": 0.8185963034629822, "logps/chosen": -1.1853874921798706, "logps/rejected": -2.4367523193359375, "loss": 0.4994, "nll_loss": 0.47683489322662354, "rewards/accuracies": 1.0, "rewards/chosen": -0.1185387521982193, "rewards/margins": 0.12513647973537445, "rewards/rejected": -0.24367523193359375, "step": 4077 }, { "epoch": 11.16495550992471, "grad_norm": 4.396689414978027, "learning_rate": 4.413698630136986e-07, "log_odds_chosen": 1.9498322010040283, "log_odds_ratio": -0.3585825562477112, "logits/chosen": 0.8246438503265381, "logits/rejected": 0.8351221680641174, "logps/chosen": -1.530670166015625, "logps/rejected": -3.3405542373657227, "loss": 0.5575, "nll_loss": 0.5215933322906494, "rewards/accuracies": 1.0, "rewards/chosen": -0.15306702256202698, "rewards/margins": 0.1809883713722229, "rewards/rejected": -0.3340553641319275, "step": 4078 }, { "epoch": 11.16769336071184, "grad_norm": 6.541515350341797, "learning_rate": 4.412328767123287e-07, "log_odds_chosen": 2.801663637161255, "log_odds_ratio": -0.404374897480011, "logits/chosen": 0.8757115602493286, "logits/rejected": 0.7682710289955139, "logps/chosen": -1.9038236141204834, "logps/rejected": -4.368861675262451, "loss": 0.5631, "nll_loss": 0.5227081775665283, "rewards/accuracies": 0.875, "rewards/chosen": -0.19038236141204834, "rewards/margins": 0.2465038299560547, "rewards/rejected": -0.436886191368103, "step": 4079 }, { "epoch": 11.170431211498974, "grad_norm": 3.2335145473480225, "learning_rate": 4.410958904109589e-07, "log_odds_chosen": 2.101489782333374, "log_odds_ratio": -0.20013441145420074, "logits/chosen": 0.9522786736488342, "logits/rejected": 0.9388278126716614, "logps/chosen": -2.1646952629089355, "logps/rejected": -4.131303787231445, "loss": 0.5464, "nll_loss": 0.5263423323631287, "rewards/accuracies": 1.0, "rewards/chosen": -0.21646952629089355, "rewards/margins": 0.1966608464717865, "rewards/rejected": -0.41313040256500244, "step": 4080 }, { "epoch": 11.173169062286105, "grad_norm": 5.873353958129883, "learning_rate": 4.40958904109589e-07, "log_odds_chosen": 3.259042501449585, "log_odds_ratio": -0.24093462526798248, "logits/chosen": 1.1978976726531982, "logits/rejected": 1.146773099899292, "logps/chosen": -2.453115463256836, "logps/rejected": -5.493716239929199, "loss": 0.713, "nll_loss": 0.6888976097106934, "rewards/accuracies": 0.875, "rewards/chosen": -0.24531155824661255, "rewards/margins": 0.30406010150909424, "rewards/rejected": -0.5493716597557068, "step": 4081 }, { "epoch": 11.175906913073238, "grad_norm": 3.6356968879699707, "learning_rate": 4.4082191780821916e-07, "log_odds_chosen": 2.520606756210327, "log_odds_ratio": -0.24779339134693146, "logits/chosen": 0.8565049171447754, "logits/rejected": 0.8847334980964661, "logps/chosen": -1.6589884757995605, "logps/rejected": -3.9720077514648438, "loss": 0.5853, "nll_loss": 0.5604763627052307, "rewards/accuracies": 0.875, "rewards/chosen": -0.16589884459972382, "rewards/margins": 0.2313019335269928, "rewards/rejected": -0.3972007632255554, "step": 4082 }, { "epoch": 11.178644763860369, "grad_norm": 6.266324996948242, "learning_rate": 4.406849315068493e-07, "log_odds_chosen": 1.068418264389038, "log_odds_ratio": -0.6607516407966614, "logits/chosen": 1.1790560483932495, "logits/rejected": 1.2402987480163574, "logps/chosen": -3.104926586151123, "logps/rejected": -4.094629764556885, "loss": 0.7388, "nll_loss": 0.6726768016815186, "rewards/accuracies": 0.5, "rewards/chosen": -0.3104926645755768, "rewards/margins": 0.09897030889987946, "rewards/rejected": -0.40946295857429504, "step": 4083 }, { "epoch": 11.181382614647502, "grad_norm": 3.3768725395202637, "learning_rate": 4.4054794520547947e-07, "log_odds_chosen": 3.355518102645874, "log_odds_ratio": -0.09578125178813934, "logits/chosen": 1.1002079248428345, "logits/rejected": 1.0822954177856445, "logps/chosen": -2.041266441345215, "logps/rejected": -5.253676414489746, "loss": 0.5562, "nll_loss": 0.5466235280036926, "rewards/accuracies": 1.0, "rewards/chosen": -0.20412664115428925, "rewards/margins": 0.32124102115631104, "rewards/rejected": -0.5253676176071167, "step": 4084 }, { "epoch": 11.184120465434633, "grad_norm": 3.5976390838623047, "learning_rate": 4.4041095890410957e-07, "log_odds_chosen": 2.7904810905456543, "log_odds_ratio": -0.19717484712600708, "logits/chosen": 0.9330796599388123, "logits/rejected": 0.8854146599769592, "logps/chosen": -1.7423325777053833, "logps/rejected": -4.375516891479492, "loss": 0.5265, "nll_loss": 0.5067686438560486, "rewards/accuracies": 1.0, "rewards/chosen": -0.17423325777053833, "rewards/margins": 0.26331838965415955, "rewards/rejected": -0.43755167722702026, "step": 4085 }, { "epoch": 11.186858316221766, "grad_norm": 3.3899567127227783, "learning_rate": 4.4027397260273967e-07, "log_odds_chosen": 1.7908774614334106, "log_odds_ratio": -0.2546420991420746, "logits/chosen": 0.9332393407821655, "logits/rejected": 0.90718674659729, "logps/chosen": -1.9809119701385498, "logps/rejected": -3.626692771911621, "loss": 0.5547, "nll_loss": 0.5292221307754517, "rewards/accuracies": 0.875, "rewards/chosen": -0.19809117913246155, "rewards/margins": 0.16457809507846832, "rewards/rejected": -0.36266928911209106, "step": 4086 }, { "epoch": 11.189596167008897, "grad_norm": 6.334984302520752, "learning_rate": 4.4013698630136987e-07, "log_odds_chosen": 0.6919799447059631, "log_odds_ratio": -0.6216132640838623, "logits/chosen": 0.761227011680603, "logits/rejected": 0.7328726649284363, "logps/chosen": -2.4936790466308594, "logps/rejected": -3.0924875736236572, "loss": 0.6396, "nll_loss": 0.5774521827697754, "rewards/accuracies": 0.625, "rewards/chosen": -0.24936789274215698, "rewards/margins": 0.05988086014986038, "rewards/rejected": -0.30924877524375916, "step": 4087 }, { "epoch": 11.19233401779603, "grad_norm": 3.3796756267547607, "learning_rate": 4.3999999999999997e-07, "log_odds_chosen": 1.7277581691741943, "log_odds_ratio": -0.35608378052711487, "logits/chosen": 1.1647101640701294, "logits/rejected": 1.2137242555618286, "logps/chosen": -1.8967812061309814, "logps/rejected": -3.5261571407318115, "loss": 0.5776, "nll_loss": 0.5420224070549011, "rewards/accuracies": 0.75, "rewards/chosen": -0.1896781176328659, "rewards/margins": 0.16293758153915405, "rewards/rejected": -0.35261571407318115, "step": 4088 }, { "epoch": 11.195071868583161, "grad_norm": 3.52925443649292, "learning_rate": 4.398630136986301e-07, "log_odds_chosen": 2.601686477661133, "log_odds_ratio": -0.21880900859832764, "logits/chosen": 0.9968109726905823, "logits/rejected": 0.9812549948692322, "logps/chosen": -2.1669347286224365, "logps/rejected": -4.51777458190918, "loss": 0.647, "nll_loss": 0.6250806450843811, "rewards/accuracies": 0.875, "rewards/chosen": -0.21669349074363708, "rewards/margins": 0.23508398234844208, "rewards/rejected": -0.45177745819091797, "step": 4089 }, { "epoch": 11.197809719370294, "grad_norm": 3.8425395488739014, "learning_rate": 4.397260273972603e-07, "log_odds_chosen": 2.6283295154571533, "log_odds_ratio": -0.2375149130821228, "logits/chosen": 1.0508824586868286, "logits/rejected": 1.0238690376281738, "logps/chosen": -2.025252103805542, "logps/rejected": -4.541620254516602, "loss": 0.6402, "nll_loss": 0.6164250373840332, "rewards/accuracies": 1.0, "rewards/chosen": -0.20252522826194763, "rewards/margins": 0.2516368329524994, "rewards/rejected": -0.454162061214447, "step": 4090 }, { "epoch": 11.200547570157426, "grad_norm": 3.8853657245635986, "learning_rate": 4.395890410958904e-07, "log_odds_chosen": 1.084044098854065, "log_odds_ratio": -0.4061746597290039, "logits/chosen": 0.7590329647064209, "logits/rejected": 0.6984192728996277, "logps/chosen": -1.8238027095794678, "logps/rejected": -2.7593653202056885, "loss": 0.5685, "nll_loss": 0.5279020667076111, "rewards/accuracies": 0.875, "rewards/chosen": -0.18238025903701782, "rewards/margins": 0.09355627745389938, "rewards/rejected": -0.2759365439414978, "step": 4091 }, { "epoch": 11.203285420944558, "grad_norm": 5.436094760894775, "learning_rate": 4.394520547945205e-07, "log_odds_chosen": 2.625763416290283, "log_odds_ratio": -0.25195109844207764, "logits/chosen": 0.9652483463287354, "logits/rejected": 1.0426164865493774, "logps/chosen": -2.5626943111419678, "logps/rejected": -5.087294578552246, "loss": 0.6265, "nll_loss": 0.6013363599777222, "rewards/accuracies": 1.0, "rewards/chosen": -0.2562694549560547, "rewards/margins": 0.2524600028991699, "rewards/rejected": -0.5087294578552246, "step": 4092 }, { "epoch": 11.206023271731691, "grad_norm": 4.619128704071045, "learning_rate": 4.393150684931506e-07, "log_odds_chosen": 2.5742533206939697, "log_odds_ratio": -0.38099995255470276, "logits/chosen": 0.9318322539329529, "logits/rejected": 0.9691330790519714, "logps/chosen": -1.9420921802520752, "logps/rejected": -4.376823425292969, "loss": 0.6452, "nll_loss": 0.6070757508277893, "rewards/accuracies": 0.875, "rewards/chosen": -0.19420921802520752, "rewards/margins": 0.2434731125831604, "rewards/rejected": -0.4376823306083679, "step": 4093 }, { "epoch": 11.208761122518823, "grad_norm": 4.787086009979248, "learning_rate": 4.3917808219178083e-07, "log_odds_chosen": 1.7164462804794312, "log_odds_ratio": -0.3014146089553833, "logits/chosen": 0.9281084537506104, "logits/rejected": 0.8843448162078857, "logps/chosen": -2.942521095275879, "logps/rejected": -4.5384392738342285, "loss": 0.5472, "nll_loss": 0.5170266032218933, "rewards/accuracies": 1.0, "rewards/chosen": -0.29425209760665894, "rewards/margins": 0.15959185361862183, "rewards/rejected": -0.45384398102760315, "step": 4094 }, { "epoch": 11.211498973305956, "grad_norm": 4.3601884841918945, "learning_rate": 4.3904109589041093e-07, "log_odds_chosen": 2.2196037769317627, "log_odds_ratio": -0.22666585445404053, "logits/chosen": 0.9270574450492859, "logits/rejected": 0.9700204133987427, "logps/chosen": -2.0339927673339844, "logps/rejected": -4.058121204376221, "loss": 0.55, "nll_loss": 0.5273064970970154, "rewards/accuracies": 0.875, "rewards/chosen": -0.20339928567409515, "rewards/margins": 0.20241284370422363, "rewards/rejected": -0.4058121144771576, "step": 4095 }, { "epoch": 11.214236824093087, "grad_norm": 3.1163785457611084, "learning_rate": 4.389041095890411e-07, "log_odds_chosen": 2.510852813720703, "log_odds_ratio": -0.21409189701080322, "logits/chosen": 0.9278289675712585, "logits/rejected": 0.9512646198272705, "logps/chosen": -1.613201379776001, "logps/rejected": -3.9496898651123047, "loss": 0.4831, "nll_loss": 0.4616925120353699, "rewards/accuracies": 1.0, "rewards/chosen": -0.16132013499736786, "rewards/margins": 0.2336488515138626, "rewards/rejected": -0.39496898651123047, "step": 4096 }, { "epoch": 11.21697467488022, "grad_norm": 3.714384078979492, "learning_rate": 4.3876712328767123e-07, "log_odds_chosen": 2.154528856277466, "log_odds_ratio": -0.28019654750823975, "logits/chosen": 1.0983613729476929, "logits/rejected": 1.1509590148925781, "logps/chosen": -1.807370662689209, "logps/rejected": -3.7743918895721436, "loss": 0.5459, "nll_loss": 0.5179191827774048, "rewards/accuracies": 1.0, "rewards/chosen": -0.18073707818984985, "rewards/margins": 0.19670212268829346, "rewards/rejected": -0.3774392306804657, "step": 4097 }, { "epoch": 11.219712525667351, "grad_norm": 3.7085301876068115, "learning_rate": 4.3863013698630133e-07, "log_odds_chosen": 2.538804054260254, "log_odds_ratio": -0.17119115591049194, "logits/chosen": 1.0955036878585815, "logits/rejected": 1.1183183193206787, "logps/chosen": -1.9169447422027588, "logps/rejected": -4.277723789215088, "loss": 0.5961, "nll_loss": 0.5790219306945801, "rewards/accuracies": 1.0, "rewards/chosen": -0.1916944831609726, "rewards/margins": 0.2360779196023941, "rewards/rejected": -0.4277723729610443, "step": 4098 }, { "epoch": 11.222450376454484, "grad_norm": 4.968678951263428, "learning_rate": 4.384931506849315e-07, "log_odds_chosen": 2.9807450771331787, "log_odds_ratio": -0.18832671642303467, "logits/chosen": 1.1743594408035278, "logits/rejected": 1.2227253913879395, "logps/chosen": -2.478407859802246, "logps/rejected": -5.323271751403809, "loss": 0.53, "nll_loss": 0.5111514329910278, "rewards/accuracies": 1.0, "rewards/chosen": -0.2478407770395279, "rewards/margins": 0.28448644280433655, "rewards/rejected": -0.5323271751403809, "step": 4099 }, { "epoch": 11.225188227241615, "grad_norm": 4.4626288414001465, "learning_rate": 4.383561643835616e-07, "log_odds_chosen": 2.0781209468841553, "log_odds_ratio": -0.30916160345077515, "logits/chosen": 1.0932341814041138, "logits/rejected": 1.090114712715149, "logps/chosen": -2.156865358352661, "logps/rejected": -4.142393112182617, "loss": 0.5352, "nll_loss": 0.504315197467804, "rewards/accuracies": 1.0, "rewards/chosen": -0.21568652987480164, "rewards/margins": 0.19855278730392456, "rewards/rejected": -0.4142393171787262, "step": 4100 }, { "epoch": 11.227926078028748, "grad_norm": 4.06662130355835, "learning_rate": 4.382191780821918e-07, "log_odds_chosen": 2.627889633178711, "log_odds_ratio": -0.3243481516838074, "logits/chosen": 0.6330674886703491, "logits/rejected": 0.5328561663627625, "logps/chosen": -2.154926061630249, "logps/rejected": -4.623536586761475, "loss": 0.6071, "nll_loss": 0.5746704339981079, "rewards/accuracies": 0.875, "rewards/chosen": -0.2154925912618637, "rewards/margins": 0.2468610554933548, "rewards/rejected": -0.4623536467552185, "step": 4101 }, { "epoch": 11.23066392881588, "grad_norm": 3.8530778884887695, "learning_rate": 4.380821917808219e-07, "log_odds_chosen": 3.18629789352417, "log_odds_ratio": -0.08791498839855194, "logits/chosen": 1.2459545135498047, "logits/rejected": 1.2684094905853271, "logps/chosen": -2.0019516944885254, "logps/rejected": -4.876437664031982, "loss": 0.5484, "nll_loss": 0.5395674705505371, "rewards/accuracies": 1.0, "rewards/chosen": -0.20019516348838806, "rewards/margins": 0.28744858503341675, "rewards/rejected": -0.4876437783241272, "step": 4102 }, { "epoch": 11.233401779603012, "grad_norm": 4.3872175216674805, "learning_rate": 4.3794520547945204e-07, "log_odds_chosen": 1.2289866209030151, "log_odds_ratio": -0.3479195237159729, "logits/chosen": 1.207533597946167, "logits/rejected": 1.1997405290603638, "logps/chosen": -2.3301210403442383, "logps/rejected": -3.459329128265381, "loss": 0.5874, "nll_loss": 0.5526044964790344, "rewards/accuracies": 0.875, "rewards/chosen": -0.2330121099948883, "rewards/margins": 0.11292082071304321, "rewards/rejected": -0.34593290090560913, "step": 4103 }, { "epoch": 11.236139630390143, "grad_norm": 4.164426326751709, "learning_rate": 4.378082191780822e-07, "log_odds_chosen": 3.416985034942627, "log_odds_ratio": -0.2661597430706024, "logits/chosen": 1.0875941514968872, "logits/rejected": 1.1468935012817383, "logps/chosen": -2.2268905639648438, "logps/rejected": -5.358468055725098, "loss": 0.6831, "nll_loss": 0.6564877033233643, "rewards/accuracies": 0.75, "rewards/chosen": -0.22268909215927124, "rewards/margins": 0.31315773725509644, "rewards/rejected": -0.5358468294143677, "step": 4104 }, { "epoch": 11.238877481177276, "grad_norm": 7.04826021194458, "learning_rate": 4.376712328767123e-07, "log_odds_chosen": 3.075244188308716, "log_odds_ratio": -0.2224493771791458, "logits/chosen": 1.0007874965667725, "logits/rejected": 0.9434317946434021, "logps/chosen": -1.8202890157699585, "logps/rejected": -4.712668418884277, "loss": 0.5889, "nll_loss": 0.5666752457618713, "rewards/accuracies": 1.0, "rewards/chosen": -0.18202891945838928, "rewards/margins": 0.2892378866672516, "rewards/rejected": -0.4712667763233185, "step": 4105 }, { "epoch": 11.241615331964407, "grad_norm": 3.3790109157562256, "learning_rate": 4.3753424657534244e-07, "log_odds_chosen": 2.642831325531006, "log_odds_ratio": -0.17775295674800873, "logits/chosen": 0.968100905418396, "logits/rejected": 0.9313938617706299, "logps/chosen": -1.314160943031311, "logps/rejected": -3.610168933868408, "loss": 0.4703, "nll_loss": 0.45250293612480164, "rewards/accuracies": 1.0, "rewards/chosen": -0.13141609728336334, "rewards/margins": 0.22960080206394196, "rewards/rejected": -0.3610168695449829, "step": 4106 }, { "epoch": 11.24435318275154, "grad_norm": 4.2501912117004395, "learning_rate": 4.3739726027397254e-07, "log_odds_chosen": 2.727079153060913, "log_odds_ratio": -0.1903235912322998, "logits/chosen": 0.915635347366333, "logits/rejected": 0.9503089189529419, "logps/chosen": -2.6475131511688232, "logps/rejected": -5.279793739318848, "loss": 0.6254, "nll_loss": 0.6063247323036194, "rewards/accuracies": 1.0, "rewards/chosen": -0.2647513151168823, "rewards/margins": 0.26322805881500244, "rewards/rejected": -0.5279793739318848, "step": 4107 }, { "epoch": 11.247091033538672, "grad_norm": 7.137565612792969, "learning_rate": 4.3726027397260275e-07, "log_odds_chosen": 2.5864293575286865, "log_odds_ratio": -0.41291555762290955, "logits/chosen": 1.2316651344299316, "logits/rejected": 1.2347089052200317, "logps/chosen": -2.305172920227051, "logps/rejected": -4.744271278381348, "loss": 0.7192, "nll_loss": 0.6778782606124878, "rewards/accuracies": 0.875, "rewards/chosen": -0.23051729798316956, "rewards/margins": 0.2439098209142685, "rewards/rejected": -0.47442713379859924, "step": 4108 }, { "epoch": 11.249828884325805, "grad_norm": 5.7846879959106445, "learning_rate": 4.3712328767123285e-07, "log_odds_chosen": 2.9396162033081055, "log_odds_ratio": -0.22780095040798187, "logits/chosen": 1.3044617176055908, "logits/rejected": 1.3630571365356445, "logps/chosen": -2.391087532043457, "logps/rejected": -5.148098468780518, "loss": 0.5473, "nll_loss": 0.5245354771614075, "rewards/accuracies": 0.875, "rewards/chosen": -0.23910874128341675, "rewards/margins": 0.275701105594635, "rewards/rejected": -0.5148098468780518, "step": 4109 }, { "epoch": 11.252566735112936, "grad_norm": 3.408546209335327, "learning_rate": 4.36986301369863e-07, "log_odds_chosen": 2.0727241039276123, "log_odds_ratio": -0.2499474138021469, "logits/chosen": 1.007840871810913, "logits/rejected": 1.0602741241455078, "logps/chosen": -2.1237285137176514, "logps/rejected": -4.059344291687012, "loss": 0.5467, "nll_loss": 0.5217052698135376, "rewards/accuracies": 0.875, "rewards/chosen": -0.21237283945083618, "rewards/margins": 0.1935615837574005, "rewards/rejected": -0.4059344530105591, "step": 4110 }, { "epoch": 11.255304585900069, "grad_norm": 4.174562931060791, "learning_rate": 4.3684931506849315e-07, "log_odds_chosen": 1.2776130437850952, "log_odds_ratio": -0.3932444751262665, "logits/chosen": 0.9726039171218872, "logits/rejected": 0.8232917189598083, "logps/chosen": -1.5073602199554443, "logps/rejected": -2.643653154373169, "loss": 0.5524, "nll_loss": 0.5130818486213684, "rewards/accuracies": 0.875, "rewards/chosen": -0.1507360339164734, "rewards/margins": 0.11362926661968231, "rewards/rejected": -0.2643652856349945, "step": 4111 }, { "epoch": 11.2580424366872, "grad_norm": 3.88222074508667, "learning_rate": 4.3671232876712325e-07, "log_odds_chosen": 1.9644362926483154, "log_odds_ratio": -0.2608792185783386, "logits/chosen": 1.1398261785507202, "logits/rejected": 1.1741249561309814, "logps/chosen": -2.6603198051452637, "logps/rejected": -4.578776836395264, "loss": 0.5886, "nll_loss": 0.5625158548355103, "rewards/accuracies": 0.875, "rewards/chosen": -0.26603201031684875, "rewards/margins": 0.19184568524360657, "rewards/rejected": -0.4578776955604553, "step": 4112 }, { "epoch": 11.260780287474333, "grad_norm": 4.937462329864502, "learning_rate": 4.365753424657534e-07, "log_odds_chosen": 4.233916282653809, "log_odds_ratio": -0.3963612914085388, "logits/chosen": 1.2813780307769775, "logits/rejected": 1.258562445640564, "logps/chosen": -2.3767926692962646, "logps/rejected": -6.475618362426758, "loss": 0.6249, "nll_loss": 0.5853038430213928, "rewards/accuracies": 0.875, "rewards/chosen": -0.23767927289009094, "rewards/margins": 0.4098825454711914, "rewards/rejected": -0.64756178855896, "step": 4113 }, { "epoch": 11.263518138261464, "grad_norm": 9.364923477172852, "learning_rate": 4.364383561643835e-07, "log_odds_chosen": 0.19964554905891418, "log_odds_ratio": -0.8773365616798401, "logits/chosen": 0.9970749616622925, "logits/rejected": 0.9271653890609741, "logps/chosen": -2.8913445472717285, "logps/rejected": -3.05513072013855, "loss": 0.7607, "nll_loss": 0.6729491949081421, "rewards/accuracies": 0.625, "rewards/chosen": -0.2891344428062439, "rewards/margins": 0.01637864299118519, "rewards/rejected": -0.30551308393478394, "step": 4114 }, { "epoch": 11.266255989048597, "grad_norm": 3.9335803985595703, "learning_rate": 4.363013698630137e-07, "log_odds_chosen": 4.607728004455566, "log_odds_ratio": -0.23711514472961426, "logits/chosen": 0.9360650777816772, "logits/rejected": 0.9787664413452148, "logps/chosen": -1.4321058988571167, "logps/rejected": -5.628200054168701, "loss": 0.5577, "nll_loss": 0.5339699387550354, "rewards/accuracies": 0.75, "rewards/chosen": -0.14321058988571167, "rewards/margins": 0.4196094274520874, "rewards/rejected": -0.5628199577331543, "step": 4115 }, { "epoch": 11.268993839835728, "grad_norm": 3.3656766414642334, "learning_rate": 4.361643835616438e-07, "log_odds_chosen": 3.3846497535705566, "log_odds_ratio": -0.20145055651664734, "logits/chosen": 0.8421158790588379, "logits/rejected": 0.8192322850227356, "logps/chosen": -1.4256818294525146, "logps/rejected": -4.603472709655762, "loss": 0.5686, "nll_loss": 0.5484209060668945, "rewards/accuracies": 1.0, "rewards/chosen": -0.1425681859254837, "rewards/margins": 0.3177791237831116, "rewards/rejected": -0.4603472948074341, "step": 4116 }, { "epoch": 11.271731690622861, "grad_norm": 3.579225540161133, "learning_rate": 4.3602739726027396e-07, "log_odds_chosen": 2.9815926551818848, "log_odds_ratio": -0.21590644121170044, "logits/chosen": 0.8728358745574951, "logits/rejected": 0.7878085374832153, "logps/chosen": -1.754494071006775, "logps/rejected": -4.534082412719727, "loss": 0.5173, "nll_loss": 0.49574241042137146, "rewards/accuracies": 0.875, "rewards/chosen": -0.1754494160413742, "rewards/margins": 0.27795886993408203, "rewards/rejected": -0.45340830087661743, "step": 4117 }, { "epoch": 11.274469541409992, "grad_norm": 4.924702167510986, "learning_rate": 4.358904109589041e-07, "log_odds_chosen": 2.6203091144561768, "log_odds_ratio": -0.26675111055374146, "logits/chosen": 0.8620714545249939, "logits/rejected": 0.8803874254226685, "logps/chosen": -1.7344045639038086, "logps/rejected": -4.171795845031738, "loss": 0.6565, "nll_loss": 0.6297829151153564, "rewards/accuracies": 1.0, "rewards/chosen": -0.17344048619270325, "rewards/margins": 0.24373911321163177, "rewards/rejected": -0.41717958450317383, "step": 4118 }, { "epoch": 11.277207392197125, "grad_norm": 6.191999912261963, "learning_rate": 4.357534246575342e-07, "log_odds_chosen": 2.121711492538452, "log_odds_ratio": -0.34696337580680847, "logits/chosen": 0.9209591150283813, "logits/rejected": 0.9071081280708313, "logps/chosen": -2.827507972717285, "logps/rejected": -4.834268093109131, "loss": 0.737, "nll_loss": 0.7023113965988159, "rewards/accuracies": 0.875, "rewards/chosen": -0.28275084495544434, "rewards/margins": 0.20067597925662994, "rewards/rejected": -0.4834267497062683, "step": 4119 }, { "epoch": 11.279945242984258, "grad_norm": 8.435423851013184, "learning_rate": 4.3561643835616436e-07, "log_odds_chosen": 1.4555351734161377, "log_odds_ratio": -0.571648120880127, "logits/chosen": 0.7607698440551758, "logits/rejected": 0.7368737459182739, "logps/chosen": -2.5165247917175293, "logps/rejected": -3.853907823562622, "loss": 0.6556, "nll_loss": 0.5984370708465576, "rewards/accuracies": 0.75, "rewards/chosen": -0.25165247917175293, "rewards/margins": 0.13373829424381256, "rewards/rejected": -0.3853907585144043, "step": 4120 }, { "epoch": 11.28268309377139, "grad_norm": 5.943355083465576, "learning_rate": 4.354794520547945e-07, "log_odds_chosen": 1.8405334949493408, "log_odds_ratio": -0.22261551022529602, "logits/chosen": 1.2652175426483154, "logits/rejected": 1.1644608974456787, "logps/chosen": -1.9395771026611328, "logps/rejected": -3.586244821548462, "loss": 0.5803, "nll_loss": 0.5580534338951111, "rewards/accuracies": 0.875, "rewards/chosen": -0.19395768642425537, "rewards/margins": 0.1646667718887329, "rewards/rejected": -0.3586244583129883, "step": 4121 }, { "epoch": 11.285420944558522, "grad_norm": 3.4533066749572754, "learning_rate": 4.3534246575342466e-07, "log_odds_chosen": 4.342792987823486, "log_odds_ratio": -0.06164904311299324, "logits/chosen": 0.9727694988250732, "logits/rejected": 1.012459635734558, "logps/chosen": -1.9703888893127441, "logps/rejected": -6.096770763397217, "loss": 0.6135, "nll_loss": 0.6073570251464844, "rewards/accuracies": 1.0, "rewards/chosen": -0.19703888893127441, "rewards/margins": 0.41263821721076965, "rewards/rejected": -0.6096771359443665, "step": 4122 }, { "epoch": 11.288158795345653, "grad_norm": 3.8714871406555176, "learning_rate": 4.3520547945205476e-07, "log_odds_chosen": 1.5591906309127808, "log_odds_ratio": -0.21606026589870453, "logits/chosen": 0.7753190398216248, "logits/rejected": 0.7213513851165771, "logps/chosen": -1.657950520515442, "logps/rejected": -3.0307531356811523, "loss": 0.6325, "nll_loss": 0.6108618378639221, "rewards/accuracies": 1.0, "rewards/chosen": -0.16579505801200867, "rewards/margins": 0.13728027045726776, "rewards/rejected": -0.3030753433704376, "step": 4123 }, { "epoch": 11.290896646132786, "grad_norm": 4.099714756011963, "learning_rate": 4.350684931506849e-07, "log_odds_chosen": 2.1818032264709473, "log_odds_ratio": -0.1918993592262268, "logits/chosen": 1.1711721420288086, "logits/rejected": 1.1779487133026123, "logps/chosen": -2.0441174507141113, "logps/rejected": -4.052136421203613, "loss": 0.7484, "nll_loss": 0.7292097806930542, "rewards/accuracies": 1.0, "rewards/chosen": -0.20441177487373352, "rewards/margins": 0.20080187916755676, "rewards/rejected": -0.4052136242389679, "step": 4124 }, { "epoch": 11.293634496919918, "grad_norm": 3.69692063331604, "learning_rate": 4.3493150684931507e-07, "log_odds_chosen": 1.7014102935791016, "log_odds_ratio": -0.21561044454574585, "logits/chosen": 1.0882242918014526, "logits/rejected": 1.1051216125488281, "logps/chosen": -1.5682390928268433, "logps/rejected": -3.0789504051208496, "loss": 0.5252, "nll_loss": 0.5036022067070007, "rewards/accuracies": 1.0, "rewards/chosen": -0.15682391822338104, "rewards/margins": 0.15107111632823944, "rewards/rejected": -0.3078950345516205, "step": 4125 }, { "epoch": 11.29637234770705, "grad_norm": 3.7561419010162354, "learning_rate": 4.3479452054794517e-07, "log_odds_chosen": 1.1246635913848877, "log_odds_ratio": -0.3665902614593506, "logits/chosen": 0.9819945693016052, "logits/rejected": 1.0151244401931763, "logps/chosen": -1.8682458400726318, "logps/rejected": -2.839400053024292, "loss": 0.5375, "nll_loss": 0.5007981657981873, "rewards/accuracies": 1.0, "rewards/chosen": -0.18682458996772766, "rewards/margins": 0.09711544215679169, "rewards/rejected": -0.28394001722335815, "step": 4126 }, { "epoch": 11.299110198494182, "grad_norm": 6.937610149383545, "learning_rate": 4.346575342465753e-07, "log_odds_chosen": 2.0237879753112793, "log_odds_ratio": -0.34075266122817993, "logits/chosen": 1.0202828645706177, "logits/rejected": 0.9340428113937378, "logps/chosen": -2.3971633911132812, "logps/rejected": -4.277400016784668, "loss": 0.6994, "nll_loss": 0.6653002500534058, "rewards/accuracies": 0.875, "rewards/chosen": -0.23971638083457947, "rewards/margins": 0.1880236268043518, "rewards/rejected": -0.4277400076389313, "step": 4127 }, { "epoch": 11.301848049281315, "grad_norm": 4.170833110809326, "learning_rate": 4.3452054794520547e-07, "log_odds_chosen": 2.6145081520080566, "log_odds_ratio": -0.2692634165287018, "logits/chosen": 1.1270880699157715, "logits/rejected": 1.0505733489990234, "logps/chosen": -1.5587716102600098, "logps/rejected": -3.964442253112793, "loss": 0.5823, "nll_loss": 0.5554065108299255, "rewards/accuracies": 1.0, "rewards/chosen": -0.15587715804576874, "rewards/margins": 0.24056705832481384, "rewards/rejected": -0.3964442014694214, "step": 4128 }, { "epoch": 11.304585900068446, "grad_norm": 4.628417015075684, "learning_rate": 4.343835616438356e-07, "log_odds_chosen": 1.2598272562026978, "log_odds_ratio": -0.4112747013568878, "logits/chosen": 0.9585331082344055, "logits/rejected": 0.8527917265892029, "logps/chosen": -1.5477017164230347, "logps/rejected": -2.666006565093994, "loss": 0.5764, "nll_loss": 0.5352434515953064, "rewards/accuracies": 0.875, "rewards/chosen": -0.154770165681839, "rewards/margins": 0.11183049529790878, "rewards/rejected": -0.26660066843032837, "step": 4129 }, { "epoch": 11.307323750855579, "grad_norm": 7.922233581542969, "learning_rate": 4.342465753424657e-07, "log_odds_chosen": 1.2936877012252808, "log_odds_ratio": -0.6367734670639038, "logits/chosen": 0.6593674421310425, "logits/rejected": 0.549562931060791, "logps/chosen": -2.6531190872192383, "logps/rejected": -3.8158504962921143, "loss": 0.6167, "nll_loss": 0.5530033111572266, "rewards/accuracies": 0.875, "rewards/chosen": -0.2653118968009949, "rewards/margins": 0.11627315729856491, "rewards/rejected": -0.3815850615501404, "step": 4130 }, { "epoch": 11.31006160164271, "grad_norm": 4.1913886070251465, "learning_rate": 4.3410958904109587e-07, "log_odds_chosen": 2.264915943145752, "log_odds_ratio": -0.17460571229457855, "logits/chosen": 1.0202665328979492, "logits/rejected": 1.0420124530792236, "logps/chosen": -3.181481122970581, "logps/rejected": -5.389223575592041, "loss": 0.7132, "nll_loss": 0.6957169771194458, "rewards/accuracies": 1.0, "rewards/chosen": -0.31814810633659363, "rewards/margins": 0.22077423334121704, "rewards/rejected": -0.5389223694801331, "step": 4131 }, { "epoch": 11.312799452429843, "grad_norm": 3.929313898086548, "learning_rate": 4.33972602739726e-07, "log_odds_chosen": 1.2566946744918823, "log_odds_ratio": -0.3031880259513855, "logits/chosen": 0.9673798084259033, "logits/rejected": 0.9159787893295288, "logps/chosen": -1.5303194522857666, "logps/rejected": -2.6179845333099365, "loss": 0.4747, "nll_loss": 0.44442877173423767, "rewards/accuracies": 1.0, "rewards/chosen": -0.15303194522857666, "rewards/margins": 0.10876649618148804, "rewards/rejected": -0.2617984712123871, "step": 4132 }, { "epoch": 11.315537303216974, "grad_norm": 4.133102893829346, "learning_rate": 4.338356164383561e-07, "log_odds_chosen": 1.5612311363220215, "log_odds_ratio": -0.2775506377220154, "logits/chosen": 0.9712883234024048, "logits/rejected": 0.8744462728500366, "logps/chosen": -1.5802948474884033, "logps/rejected": -2.875885486602783, "loss": 0.4767, "nll_loss": 0.44892093539237976, "rewards/accuracies": 1.0, "rewards/chosen": -0.1580294966697693, "rewards/margins": 0.12955906987190247, "rewards/rejected": -0.28758853673934937, "step": 4133 }, { "epoch": 11.318275154004107, "grad_norm": 3.2826714515686035, "learning_rate": 4.336986301369863e-07, "log_odds_chosen": 3.6012043952941895, "log_odds_ratio": -0.23420734703540802, "logits/chosen": 1.082229733467102, "logits/rejected": 1.049047827720642, "logps/chosen": -1.4569642543792725, "logps/rejected": -4.851729393005371, "loss": 0.5718, "nll_loss": 0.5483721494674683, "rewards/accuracies": 1.0, "rewards/chosen": -0.14569643139839172, "rewards/margins": 0.3394765257835388, "rewards/rejected": -0.48517298698425293, "step": 4134 }, { "epoch": 11.321013004791238, "grad_norm": 3.421722888946533, "learning_rate": 4.3356164383561643e-07, "log_odds_chosen": 3.814271926879883, "log_odds_ratio": -0.15905877947807312, "logits/chosen": 1.0650761127471924, "logits/rejected": 0.9705560207366943, "logps/chosen": -1.495546817779541, "logps/rejected": -5.046239376068115, "loss": 0.5547, "nll_loss": 0.5387716293334961, "rewards/accuracies": 1.0, "rewards/chosen": -0.14955468475818634, "rewards/margins": 0.35506922006607056, "rewards/rejected": -0.5046238899230957, "step": 4135 }, { "epoch": 11.323750855578371, "grad_norm": 4.002130508422852, "learning_rate": 4.334246575342466e-07, "log_odds_chosen": 1.8421849012374878, "log_odds_ratio": -0.46258339285850525, "logits/chosen": 0.6871366500854492, "logits/rejected": 0.7069815993309021, "logps/chosen": -1.777843952178955, "logps/rejected": -3.3641014099121094, "loss": 0.5661, "nll_loss": 0.5198888182640076, "rewards/accuracies": 0.875, "rewards/chosen": -0.17778438329696655, "rewards/margins": 0.1586257517337799, "rewards/rejected": -0.33641016483306885, "step": 4136 }, { "epoch": 11.326488706365502, "grad_norm": 3.545680522918701, "learning_rate": 4.332876712328767e-07, "log_odds_chosen": 2.7540063858032227, "log_odds_ratio": -0.19426865875720978, "logits/chosen": 0.9578527212142944, "logits/rejected": 0.9277864694595337, "logps/chosen": -1.7747057676315308, "logps/rejected": -4.316220760345459, "loss": 0.5591, "nll_loss": 0.5396392345428467, "rewards/accuracies": 1.0, "rewards/chosen": -0.17747057974338531, "rewards/margins": 0.25415152311325073, "rewards/rejected": -0.43162214756011963, "step": 4137 }, { "epoch": 11.329226557152635, "grad_norm": 6.476957321166992, "learning_rate": 4.331506849315068e-07, "log_odds_chosen": 3.402621269226074, "log_odds_ratio": -0.37418800592422485, "logits/chosen": 0.6804078221321106, "logits/rejected": 0.607719898223877, "logps/chosen": -2.138928174972534, "logps/rejected": -5.378194808959961, "loss": 0.717, "nll_loss": 0.6795801520347595, "rewards/accuracies": 0.875, "rewards/chosen": -0.2138928323984146, "rewards/margins": 0.3239266276359558, "rewards/rejected": -0.537819504737854, "step": 4138 }, { "epoch": 11.331964407939767, "grad_norm": 3.725329875946045, "learning_rate": 4.33013698630137e-07, "log_odds_chosen": 3.4159159660339355, "log_odds_ratio": -0.3471676707267761, "logits/chosen": 0.9763854742050171, "logits/rejected": 0.973713219165802, "logps/chosen": -2.3990492820739746, "logps/rejected": -5.729053974151611, "loss": 0.7264, "nll_loss": 0.6916594505310059, "rewards/accuracies": 0.875, "rewards/chosen": -0.23990492522716522, "rewards/margins": 0.3330004811286926, "rewards/rejected": -0.572905421257019, "step": 4139 }, { "epoch": 11.3347022587269, "grad_norm": 3.790865659713745, "learning_rate": 4.328767123287671e-07, "log_odds_chosen": 1.1189913749694824, "log_odds_ratio": -0.34685391187667847, "logits/chosen": 1.0235474109649658, "logits/rejected": 0.979046106338501, "logps/chosen": -1.6069345474243164, "logps/rejected": -2.53707218170166, "loss": 0.5117, "nll_loss": 0.4770023822784424, "rewards/accuracies": 1.0, "rewards/chosen": -0.1606934517621994, "rewards/margins": 0.09301374852657318, "rewards/rejected": -0.2537072002887726, "step": 4140 }, { "epoch": 11.33744010951403, "grad_norm": 6.852682590484619, "learning_rate": 4.3273972602739724e-07, "log_odds_chosen": 2.671718120574951, "log_odds_ratio": -0.24926717579364777, "logits/chosen": 1.068381667137146, "logits/rejected": 1.1311496496200562, "logps/chosen": -2.6736817359924316, "logps/rejected": -5.159304141998291, "loss": 0.6553, "nll_loss": 0.6303803324699402, "rewards/accuracies": 0.875, "rewards/chosen": -0.2673681676387787, "rewards/margins": 0.24856224656105042, "rewards/rejected": -0.5159304141998291, "step": 4141 }, { "epoch": 11.340177960301164, "grad_norm": 3.5134875774383545, "learning_rate": 4.326027397260274e-07, "log_odds_chosen": 4.532315254211426, "log_odds_ratio": -0.11685532331466675, "logits/chosen": 1.2753357887268066, "logits/rejected": 1.294693946838379, "logps/chosen": -1.810929775238037, "logps/rejected": -6.151037693023682, "loss": 0.6266, "nll_loss": 0.6148884296417236, "rewards/accuracies": 1.0, "rewards/chosen": -0.18109296262264252, "rewards/margins": 0.4340108036994934, "rewards/rejected": -0.6151037812232971, "step": 4142 }, { "epoch": 11.342915811088295, "grad_norm": 3.390994071960449, "learning_rate": 4.3246575342465754e-07, "log_odds_chosen": 1.8950122594833374, "log_odds_ratio": -0.2515859007835388, "logits/chosen": 1.0213021039962769, "logits/rejected": 1.0268824100494385, "logps/chosen": -1.6619793176651, "logps/rejected": -3.403367519378662, "loss": 0.5019, "nll_loss": 0.4767691493034363, "rewards/accuracies": 1.0, "rewards/chosen": -0.16619792580604553, "rewards/margins": 0.1741388440132141, "rewards/rejected": -0.34033674001693726, "step": 4143 }, { "epoch": 11.345653661875428, "grad_norm": 3.8352468013763428, "learning_rate": 4.3232876712328764e-07, "log_odds_chosen": 2.8609671592712402, "log_odds_ratio": -0.15607509016990662, "logits/chosen": 0.9021428227424622, "logits/rejected": 0.8947343826293945, "logps/chosen": -2.023489236831665, "logps/rejected": -4.7285284996032715, "loss": 0.7785, "nll_loss": 0.7628549337387085, "rewards/accuracies": 1.0, "rewards/chosen": -0.20234893262386322, "rewards/margins": 0.2705039381980896, "rewards/rejected": -0.4728528559207916, "step": 4144 }, { "epoch": 11.34839151266256, "grad_norm": 3.482152223587036, "learning_rate": 4.3219178082191774e-07, "log_odds_chosen": 2.5807676315307617, "log_odds_ratio": -0.19756564497947693, "logits/chosen": 0.8729903697967529, "logits/rejected": 0.840804934501648, "logps/chosen": -1.5390138626098633, "logps/rejected": -3.897731304168701, "loss": 0.5675, "nll_loss": 0.5477005839347839, "rewards/accuracies": 1.0, "rewards/chosen": -0.1539013683795929, "rewards/margins": 0.23587177693843842, "rewards/rejected": -0.3897731602191925, "step": 4145 }, { "epoch": 11.351129363449692, "grad_norm": 4.308889389038086, "learning_rate": 4.3205479452054794e-07, "log_odds_chosen": 1.2329401969909668, "log_odds_ratio": -0.4075024425983429, "logits/chosen": 0.9207710027694702, "logits/rejected": 0.9074904918670654, "logps/chosen": -2.016146183013916, "logps/rejected": -3.1537976264953613, "loss": 0.5003, "nll_loss": 0.45952773094177246, "rewards/accuracies": 0.875, "rewards/chosen": -0.2016146332025528, "rewards/margins": 0.11376510560512543, "rewards/rejected": -0.3153797388076782, "step": 4146 }, { "epoch": 11.353867214236825, "grad_norm": 9.603839874267578, "learning_rate": 4.3191780821917804e-07, "log_odds_chosen": 3.4119319915771484, "log_odds_ratio": -0.229107066988945, "logits/chosen": 0.9873074293136597, "logits/rejected": 1.000640869140625, "logps/chosen": -3.1321845054626465, "logps/rejected": -6.430552959442139, "loss": 0.7411, "nll_loss": 0.7181476354598999, "rewards/accuracies": 0.875, "rewards/chosen": -0.31321844458580017, "rewards/margins": 0.3298368752002716, "rewards/rejected": -0.643055260181427, "step": 4147 }, { "epoch": 11.356605065023956, "grad_norm": 3.711981773376465, "learning_rate": 4.317808219178082e-07, "log_odds_chosen": 2.4459362030029297, "log_odds_ratio": -0.18044111132621765, "logits/chosen": 1.2103806734085083, "logits/rejected": 1.2726856470108032, "logps/chosen": -2.5741240978240967, "logps/rejected": -4.927368640899658, "loss": 0.6356, "nll_loss": 0.617538332939148, "rewards/accuracies": 0.875, "rewards/chosen": -0.2574124038219452, "rewards/margins": 0.23532447218894958, "rewards/rejected": -0.4927368760108948, "step": 4148 }, { "epoch": 11.359342915811089, "grad_norm": 3.7826151847839355, "learning_rate": 4.3164383561643835e-07, "log_odds_chosen": 1.2284471988677979, "log_odds_ratio": -0.3311430811882019, "logits/chosen": 0.7205986380577087, "logits/rejected": 0.6966219544410706, "logps/chosen": -2.117255210876465, "logps/rejected": -3.205775737762451, "loss": 0.5808, "nll_loss": 0.5476493835449219, "rewards/accuracies": 0.875, "rewards/chosen": -0.21172553300857544, "rewards/margins": 0.10885202884674072, "rewards/rejected": -0.32057756185531616, "step": 4149 }, { "epoch": 11.36208076659822, "grad_norm": 7.863154411315918, "learning_rate": 4.315068493150685e-07, "log_odds_chosen": 1.532249927520752, "log_odds_ratio": -0.6049163937568665, "logits/chosen": 0.9263162016868591, "logits/rejected": 0.8551148176193237, "logps/chosen": -2.529204845428467, "logps/rejected": -3.971165180206299, "loss": 0.6678, "nll_loss": 0.6073191165924072, "rewards/accuracies": 0.75, "rewards/chosen": -0.2529204785823822, "rewards/margins": 0.14419609308242798, "rewards/rejected": -0.3971165716648102, "step": 4150 }, { "epoch": 11.364818617385353, "grad_norm": 4.508450031280518, "learning_rate": 4.313698630136986e-07, "log_odds_chosen": 1.7728688716888428, "log_odds_ratio": -0.2965918183326721, "logits/chosen": 1.3967697620391846, "logits/rejected": 1.4169044494628906, "logps/chosen": -1.696946144104004, "logps/rejected": -3.2453911304473877, "loss": 0.4864, "nll_loss": 0.45678508281707764, "rewards/accuracies": 0.875, "rewards/chosen": -0.16969460248947144, "rewards/margins": 0.1548445224761963, "rewards/rejected": -0.3245391249656677, "step": 4151 }, { "epoch": 11.367556468172484, "grad_norm": 4.277490139007568, "learning_rate": 4.3123287671232875e-07, "log_odds_chosen": 2.9143130779266357, "log_odds_ratio": -0.12171109020709991, "logits/chosen": 1.2047462463378906, "logits/rejected": 1.1409300565719604, "logps/chosen": -1.869930624961853, "logps/rejected": -4.562812805175781, "loss": 0.5872, "nll_loss": 0.5750343799591064, "rewards/accuracies": 1.0, "rewards/chosen": -0.1869930624961853, "rewards/margins": 0.26928818225860596, "rewards/rejected": -0.45628124475479126, "step": 4152 }, { "epoch": 11.370294318959617, "grad_norm": 3.9412012100219727, "learning_rate": 4.310958904109589e-07, "log_odds_chosen": 1.8813096284866333, "log_odds_ratio": -0.3235272467136383, "logits/chosen": 0.9624446034431458, "logits/rejected": 0.8743953704833984, "logps/chosen": -1.6416555643081665, "logps/rejected": -3.34447979927063, "loss": 0.5218, "nll_loss": 0.48942291736602783, "rewards/accuracies": 0.875, "rewards/chosen": -0.16416554152965546, "rewards/margins": 0.17028245329856873, "rewards/rejected": -0.334447979927063, "step": 4153 }, { "epoch": 11.373032169746748, "grad_norm": 7.041513442993164, "learning_rate": 4.30958904109589e-07, "log_odds_chosen": 0.9729495048522949, "log_odds_ratio": -0.3526627719402313, "logits/chosen": 0.952684223651886, "logits/rejected": 0.8812840580940247, "logps/chosen": -2.5364251136779785, "logps/rejected": -3.4196369647979736, "loss": 0.6411, "nll_loss": 0.6058756709098816, "rewards/accuracies": 1.0, "rewards/chosen": -0.2536424994468689, "rewards/margins": 0.08832120150327682, "rewards/rejected": -0.3419637084007263, "step": 4154 }, { "epoch": 11.375770020533881, "grad_norm": 5.85205078125, "learning_rate": 4.3082191780821915e-07, "log_odds_chosen": 1.052445650100708, "log_odds_ratio": -0.5633420348167419, "logits/chosen": 1.0918943881988525, "logits/rejected": 1.0485572814941406, "logps/chosen": -2.2170777320861816, "logps/rejected": -3.2110819816589355, "loss": 0.7333, "nll_loss": 0.6769970655441284, "rewards/accuracies": 0.75, "rewards/chosen": -0.2217077761888504, "rewards/margins": 0.09940041601657867, "rewards/rejected": -0.3211081922054291, "step": 4155 }, { "epoch": 11.378507871321013, "grad_norm": 3.850461721420288, "learning_rate": 4.306849315068493e-07, "log_odds_chosen": 1.9691706895828247, "log_odds_ratio": -0.20205166935920715, "logits/chosen": 1.0217976570129395, "logits/rejected": 1.0797348022460938, "logps/chosen": -1.8953280448913574, "logps/rejected": -3.7030816078186035, "loss": 0.5659, "nll_loss": 0.5457432866096497, "rewards/accuracies": 1.0, "rewards/chosen": -0.1895328015089035, "rewards/margins": 0.18077537417411804, "rewards/rejected": -0.37030816078186035, "step": 4156 }, { "epoch": 11.381245722108146, "grad_norm": 4.6028218269348145, "learning_rate": 4.3054794520547946e-07, "log_odds_chosen": 2.3705694675445557, "log_odds_ratio": -0.26791778206825256, "logits/chosen": 1.0443793535232544, "logits/rejected": 1.068267583847046, "logps/chosen": -2.49544358253479, "logps/rejected": -4.765095233917236, "loss": 0.7208, "nll_loss": 0.6939675211906433, "rewards/accuracies": 0.875, "rewards/chosen": -0.24954435229301453, "rewards/margins": 0.22696515917778015, "rewards/rejected": -0.4765095114707947, "step": 4157 }, { "epoch": 11.383983572895277, "grad_norm": 3.330817937850952, "learning_rate": 4.3041095890410956e-07, "log_odds_chosen": 2.15718150138855, "log_odds_ratio": -0.26352596282958984, "logits/chosen": 1.0128521919250488, "logits/rejected": 0.9900277256965637, "logps/chosen": -1.6761572360992432, "logps/rejected": -3.6847333908081055, "loss": 0.5559, "nll_loss": 0.5295459032058716, "rewards/accuracies": 1.0, "rewards/chosen": -0.16761571168899536, "rewards/margins": 0.20085762441158295, "rewards/rejected": -0.3684733211994171, "step": 4158 }, { "epoch": 11.38672142368241, "grad_norm": 6.568319797515869, "learning_rate": 4.302739726027397e-07, "log_odds_chosen": 1.4620366096496582, "log_odds_ratio": -0.5160412192344666, "logits/chosen": 0.9808650016784668, "logits/rejected": 0.8906028270721436, "logps/chosen": -2.310965061187744, "logps/rejected": -3.6877858638763428, "loss": 0.6117, "nll_loss": 0.5600481033325195, "rewards/accuracies": 0.75, "rewards/chosen": -0.23109650611877441, "rewards/margins": 0.13768209517002106, "rewards/rejected": -0.3687785863876343, "step": 4159 }, { "epoch": 11.38945927446954, "grad_norm": 3.8093109130859375, "learning_rate": 4.3013698630136986e-07, "log_odds_chosen": 3.5319764614105225, "log_odds_ratio": -0.2568064332008362, "logits/chosen": 1.3042150735855103, "logits/rejected": 1.3459458351135254, "logps/chosen": -2.615973472595215, "logps/rejected": -5.9774603843688965, "loss": 0.6331, "nll_loss": 0.6074401140213013, "rewards/accuracies": 0.875, "rewards/chosen": -0.2615973949432373, "rewards/margins": 0.3361486792564392, "rewards/rejected": -0.5977460145950317, "step": 4160 }, { "epoch": 11.392197125256674, "grad_norm": 3.6048922538757324, "learning_rate": 4.2999999999999996e-07, "log_odds_chosen": 2.2256579399108887, "log_odds_ratio": -0.30396193265914917, "logits/chosen": 0.8742960691452026, "logits/rejected": 0.8697216510772705, "logps/chosen": -1.8289463520050049, "logps/rejected": -3.788747549057007, "loss": 0.5661, "nll_loss": 0.5357330441474915, "rewards/accuracies": 1.0, "rewards/chosen": -0.18289463222026825, "rewards/margins": 0.19598010182380676, "rewards/rejected": -0.3788747787475586, "step": 4161 }, { "epoch": 11.394934976043805, "grad_norm": 3.4520483016967773, "learning_rate": 4.298630136986301e-07, "log_odds_chosen": 3.4622507095336914, "log_odds_ratio": -0.19532662630081177, "logits/chosen": 0.9684255123138428, "logits/rejected": 0.984199047088623, "logps/chosen": -2.0935497283935547, "logps/rejected": -5.4074859619140625, "loss": 0.6337, "nll_loss": 0.6141420602798462, "rewards/accuracies": 1.0, "rewards/chosen": -0.20935499668121338, "rewards/margins": 0.3313935697078705, "rewards/rejected": -0.5407485961914062, "step": 4162 }, { "epoch": 11.397672826830938, "grad_norm": 3.3870081901550293, "learning_rate": 4.2972602739726026e-07, "log_odds_chosen": 3.566758871078491, "log_odds_ratio": -0.15421326458454132, "logits/chosen": 1.1209869384765625, "logits/rejected": 1.1584464311599731, "logps/chosen": -1.8287646770477295, "logps/rejected": -5.221826553344727, "loss": 0.5618, "nll_loss": 0.5463972091674805, "rewards/accuracies": 0.875, "rewards/chosen": -0.18287646770477295, "rewards/margins": 0.33930617570877075, "rewards/rejected": -0.5221826434135437, "step": 4163 }, { "epoch": 11.40041067761807, "grad_norm": 3.494879722595215, "learning_rate": 4.295890410958904e-07, "log_odds_chosen": 2.529951572418213, "log_odds_ratio": -0.16355407238006592, "logits/chosen": 1.1199111938476562, "logits/rejected": 1.1288299560546875, "logps/chosen": -1.8518192768096924, "logps/rejected": -4.2034382820129395, "loss": 0.5212, "nll_loss": 0.5048457384109497, "rewards/accuracies": 0.875, "rewards/chosen": -0.18518194556236267, "rewards/margins": 0.2351619154214859, "rewards/rejected": -0.420343816280365, "step": 4164 }, { "epoch": 11.403148528405202, "grad_norm": 4.982388973236084, "learning_rate": 4.294520547945205e-07, "log_odds_chosen": 2.2654595375061035, "log_odds_ratio": -0.26369810104370117, "logits/chosen": 1.141210913658142, "logits/rejected": 1.1281709671020508, "logps/chosen": -2.301650047302246, "logps/rejected": -4.460880756378174, "loss": 0.7091, "nll_loss": 0.682769238948822, "rewards/accuracies": 1.0, "rewards/chosen": -0.2301650196313858, "rewards/margins": 0.2159230262041092, "rewards/rejected": -0.446088045835495, "step": 4165 }, { "epoch": 11.405886379192333, "grad_norm": 4.505725383758545, "learning_rate": 4.2931506849315067e-07, "log_odds_chosen": 2.880366563796997, "log_odds_ratio": -0.17858950793743134, "logits/chosen": 1.0773191452026367, "logits/rejected": 1.1032263040542603, "logps/chosen": -2.1034884452819824, "logps/rejected": -4.790300369262695, "loss": 0.5265, "nll_loss": 0.5086022615432739, "rewards/accuracies": 1.0, "rewards/chosen": -0.21034882962703705, "rewards/margins": 0.2686811685562134, "rewards/rejected": -0.479030042886734, "step": 4166 }, { "epoch": 11.408624229979466, "grad_norm": 5.703822612762451, "learning_rate": 4.291780821917808e-07, "log_odds_chosen": 1.7415733337402344, "log_odds_ratio": -0.2618057429790497, "logits/chosen": 0.966791033744812, "logits/rejected": 0.9900832176208496, "logps/chosen": -2.7265379428863525, "logps/rejected": -4.318170547485352, "loss": 0.5995, "nll_loss": 0.5733156204223633, "rewards/accuracies": 0.875, "rewards/chosen": -0.2726537883281708, "rewards/margins": 0.15916329622268677, "rewards/rejected": -0.43181705474853516, "step": 4167 }, { "epoch": 11.411362080766597, "grad_norm": 4.301316738128662, "learning_rate": 4.290410958904109e-07, "log_odds_chosen": 3.326254367828369, "log_odds_ratio": -0.20980282127857208, "logits/chosen": 0.9617115259170532, "logits/rejected": 0.9929165244102478, "logps/chosen": -2.891225814819336, "logps/rejected": -6.072209358215332, "loss": 0.5159, "nll_loss": 0.4949079751968384, "rewards/accuracies": 0.875, "rewards/chosen": -0.2891225814819336, "rewards/margins": 0.3180983364582062, "rewards/rejected": -0.6072209477424622, "step": 4168 }, { "epoch": 11.41409993155373, "grad_norm": 6.678948402404785, "learning_rate": 4.2890410958904107e-07, "log_odds_chosen": 1.9876691102981567, "log_odds_ratio": -0.504738986492157, "logits/chosen": 1.0932915210723877, "logits/rejected": 1.1062339544296265, "logps/chosen": -2.097273349761963, "logps/rejected": -3.88725209236145, "loss": 0.579, "nll_loss": 0.5284993052482605, "rewards/accuracies": 0.875, "rewards/chosen": -0.20972736179828644, "rewards/margins": 0.17899787425994873, "rewards/rejected": -0.388725221157074, "step": 4169 }, { "epoch": 11.416837782340863, "grad_norm": 3.8637681007385254, "learning_rate": 4.287671232876712e-07, "log_odds_chosen": 2.0024893283843994, "log_odds_ratio": -0.29868993163108826, "logits/chosen": 0.8856756687164307, "logits/rejected": 0.7901738882064819, "logps/chosen": -2.4692678451538086, "logps/rejected": -4.374105930328369, "loss": 0.6138, "nll_loss": 0.5839244723320007, "rewards/accuracies": 1.0, "rewards/chosen": -0.24692676961421967, "rewards/margins": 0.19048386812210083, "rewards/rejected": -0.4374106526374817, "step": 4170 }, { "epoch": 11.419575633127995, "grad_norm": 4.178426265716553, "learning_rate": 4.286301369863014e-07, "log_odds_chosen": 2.1283788681030273, "log_odds_ratio": -0.2996044456958771, "logits/chosen": 1.2217203378677368, "logits/rejected": 1.2611788511276245, "logps/chosen": -2.40179443359375, "logps/rejected": -4.472954750061035, "loss": 0.6217, "nll_loss": 0.5917108654975891, "rewards/accuracies": 0.875, "rewards/chosen": -0.24017944931983948, "rewards/margins": 0.207116037607193, "rewards/rejected": -0.44729548692703247, "step": 4171 }, { "epoch": 11.422313483915127, "grad_norm": 3.3460826873779297, "learning_rate": 4.2849315068493147e-07, "log_odds_chosen": 2.5048155784606934, "log_odds_ratio": -0.14780494570732117, "logits/chosen": 0.9435727596282959, "logits/rejected": 0.9633171558380127, "logps/chosen": -2.141894817352295, "logps/rejected": -4.429104804992676, "loss": 0.5454, "nll_loss": 0.5306037068367004, "rewards/accuracies": 1.0, "rewards/chosen": -0.21418949961662292, "rewards/margins": 0.2287209928035736, "rewards/rejected": -0.44291049242019653, "step": 4172 }, { "epoch": 11.425051334702259, "grad_norm": 4.627471446990967, "learning_rate": 4.283561643835616e-07, "log_odds_chosen": 2.6031625270843506, "log_odds_ratio": -0.1693534553050995, "logits/chosen": 1.1293480396270752, "logits/rejected": 1.1624925136566162, "logps/chosen": -2.375743865966797, "logps/rejected": -4.790088653564453, "loss": 0.579, "nll_loss": 0.5620290040969849, "rewards/accuracies": 1.0, "rewards/chosen": -0.23757439851760864, "rewards/margins": 0.2414345145225525, "rewards/rejected": -0.47900888323783875, "step": 4173 }, { "epoch": 11.427789185489392, "grad_norm": 7.742999076843262, "learning_rate": 4.282191780821918e-07, "log_odds_chosen": 2.712832450866699, "log_odds_ratio": -0.6455856561660767, "logits/chosen": 0.7919365167617798, "logits/rejected": 0.7777845859527588, "logps/chosen": -2.6281328201293945, "logps/rejected": -5.070517539978027, "loss": 0.6669, "nll_loss": 0.6023646593093872, "rewards/accuracies": 0.75, "rewards/chosen": -0.2628132998943329, "rewards/margins": 0.2442384660243988, "rewards/rejected": -0.5070517063140869, "step": 4174 }, { "epoch": 11.430527036276523, "grad_norm": 3.2440128326416016, "learning_rate": 4.280821917808219e-07, "log_odds_chosen": 4.087453842163086, "log_odds_ratio": -0.06214278191328049, "logits/chosen": 1.2846417427062988, "logits/rejected": 1.3378815650939941, "logps/chosen": -2.2494592666625977, "logps/rejected": -6.177186965942383, "loss": 0.6697, "nll_loss": 0.663513720035553, "rewards/accuracies": 1.0, "rewards/chosen": -0.22494593262672424, "rewards/margins": 0.3927728235721588, "rewards/rejected": -0.6177186965942383, "step": 4175 }, { "epoch": 11.433264887063656, "grad_norm": 10.959885597229004, "learning_rate": 4.2794520547945203e-07, "log_odds_chosen": 2.0699801445007324, "log_odds_ratio": -0.38612037897109985, "logits/chosen": 0.9508488178253174, "logits/rejected": 0.9784406423568726, "logps/chosen": -1.8514386415481567, "logps/rejected": -3.6942005157470703, "loss": 0.598, "nll_loss": 0.559403121471405, "rewards/accuracies": 0.875, "rewards/chosen": -0.1851438581943512, "rewards/margins": 0.18427619338035583, "rewards/rejected": -0.3694200813770294, "step": 4176 }, { "epoch": 11.436002737850787, "grad_norm": 3.004251003265381, "learning_rate": 4.278082191780822e-07, "log_odds_chosen": 4.157602310180664, "log_odds_ratio": -0.1618078500032425, "logits/chosen": 1.202602505683899, "logits/rejected": 1.1909985542297363, "logps/chosen": -1.4875743389129639, "logps/rejected": -5.381138324737549, "loss": 0.5268, "nll_loss": 0.5106629729270935, "rewards/accuracies": 0.875, "rewards/chosen": -0.1487574428319931, "rewards/margins": 0.389356404542923, "rewards/rejected": -0.5381138324737549, "step": 4177 }, { "epoch": 11.43874058863792, "grad_norm": 3.4315314292907715, "learning_rate": 4.2767123287671233e-07, "log_odds_chosen": 2.493393898010254, "log_odds_ratio": -0.1879512369632721, "logits/chosen": 1.021803855895996, "logits/rejected": 1.0382230281829834, "logps/chosen": -1.540858507156372, "logps/rejected": -3.7987706661224365, "loss": 0.5074, "nll_loss": 0.4885702133178711, "rewards/accuracies": 1.0, "rewards/chosen": -0.15408584475517273, "rewards/margins": 0.22579123079776764, "rewards/rejected": -0.37987709045410156, "step": 4178 }, { "epoch": 11.441478439425051, "grad_norm": 3.84983229637146, "learning_rate": 4.2753424657534243e-07, "log_odds_chosen": 2.1107237339019775, "log_odds_ratio": -0.3001698851585388, "logits/chosen": 1.0433886051177979, "logits/rejected": 1.0447108745574951, "logps/chosen": -2.076669931411743, "logps/rejected": -4.03217077255249, "loss": 0.6048, "nll_loss": 0.5747522711753845, "rewards/accuracies": 1.0, "rewards/chosen": -0.2076670080423355, "rewards/margins": 0.1955500990152359, "rewards/rejected": -0.403217077255249, "step": 4179 }, { "epoch": 11.444216290212184, "grad_norm": 4.492799282073975, "learning_rate": 4.273972602739726e-07, "log_odds_chosen": 1.8950139284133911, "log_odds_ratio": -0.24877400696277618, "logits/chosen": 1.1214230060577393, "logits/rejected": 1.045695185661316, "logps/chosen": -1.591381311416626, "logps/rejected": -3.2948522567749023, "loss": 0.5639, "nll_loss": 0.5390511751174927, "rewards/accuracies": 1.0, "rewards/chosen": -0.15913812816143036, "rewards/margins": 0.17034709453582764, "rewards/rejected": -0.3294852375984192, "step": 4180 }, { "epoch": 11.446954140999315, "grad_norm": 3.4515280723571777, "learning_rate": 4.2726027397260274e-07, "log_odds_chosen": 3.258105516433716, "log_odds_ratio": -0.10804884880781174, "logits/chosen": 1.3094751834869385, "logits/rejected": 1.3696327209472656, "logps/chosen": -2.0424001216888428, "logps/rejected": -5.123211860656738, "loss": 0.5478, "nll_loss": 0.5369940996170044, "rewards/accuracies": 1.0, "rewards/chosen": -0.20424000918865204, "rewards/margins": 0.3080812096595764, "rewards/rejected": -0.5123212337493896, "step": 4181 }, { "epoch": 11.449691991786448, "grad_norm": 3.9652395248413086, "learning_rate": 4.2712328767123284e-07, "log_odds_chosen": 2.876969814300537, "log_odds_ratio": -0.17640285193920135, "logits/chosen": 0.8687412738800049, "logits/rejected": 0.8583410978317261, "logps/chosen": -1.9609076976776123, "logps/rejected": -4.676302909851074, "loss": 0.5708, "nll_loss": 0.5531115531921387, "rewards/accuracies": 1.0, "rewards/chosen": -0.19609077274799347, "rewards/margins": 0.27153950929641724, "rewards/rejected": -0.4676303267478943, "step": 4182 }, { "epoch": 11.45242984257358, "grad_norm": 4.311538219451904, "learning_rate": 4.26986301369863e-07, "log_odds_chosen": 1.4327069520950317, "log_odds_ratio": -0.3574010729789734, "logits/chosen": 0.9744265079498291, "logits/rejected": 0.9476837515830994, "logps/chosen": -1.6560158729553223, "logps/rejected": -2.9713692665100098, "loss": 0.6416, "nll_loss": 0.6058727502822876, "rewards/accuracies": 0.875, "rewards/chosen": -0.16560158133506775, "rewards/margins": 0.1315353661775589, "rewards/rejected": -0.29713693261146545, "step": 4183 }, { "epoch": 11.455167693360712, "grad_norm": 3.737997055053711, "learning_rate": 4.2684931506849314e-07, "log_odds_chosen": 3.32737398147583, "log_odds_ratio": -0.188154399394989, "logits/chosen": 1.1273436546325684, "logits/rejected": 1.1967835426330566, "logps/chosen": -1.871954321861267, "logps/rejected": -4.967426776885986, "loss": 0.5027, "nll_loss": 0.48387885093688965, "rewards/accuracies": 0.875, "rewards/chosen": -0.18719543516635895, "rewards/margins": 0.30954721570014954, "rewards/rejected": -0.4967426657676697, "step": 4184 }, { "epoch": 11.457905544147843, "grad_norm": 6.326009750366211, "learning_rate": 4.2671232876712324e-07, "log_odds_chosen": 1.5889606475830078, "log_odds_ratio": -0.45034053921699524, "logits/chosen": 1.0344980955123901, "logits/rejected": 1.0148426294326782, "logps/chosen": -2.02109956741333, "logps/rejected": -3.527587413787842, "loss": 0.6519, "nll_loss": 0.6068315505981445, "rewards/accuracies": 0.875, "rewards/chosen": -0.20210996270179749, "rewards/margins": 0.15064877271652222, "rewards/rejected": -0.3527587652206421, "step": 4185 }, { "epoch": 11.460643394934976, "grad_norm": 6.361895561218262, "learning_rate": 4.265753424657534e-07, "log_odds_chosen": 1.4594407081604004, "log_odds_ratio": -0.7761671543121338, "logits/chosen": 1.09299635887146, "logits/rejected": 1.112146019935608, "logps/chosen": -2.317162036895752, "logps/rejected": -3.571873903274536, "loss": 0.5637, "nll_loss": 0.48607468605041504, "rewards/accuracies": 0.75, "rewards/chosen": -0.23171618580818176, "rewards/margins": 0.12547120451927185, "rewards/rejected": -0.357187420129776, "step": 4186 }, { "epoch": 11.463381245722108, "grad_norm": 3.6381797790527344, "learning_rate": 4.2643835616438354e-07, "log_odds_chosen": 3.3883488178253174, "log_odds_ratio": -0.2362167239189148, "logits/chosen": 1.065949559211731, "logits/rejected": 1.0195497274398804, "logps/chosen": -2.283940315246582, "logps/rejected": -5.524715423583984, "loss": 0.5572, "nll_loss": 0.5335415005683899, "rewards/accuracies": 1.0, "rewards/chosen": -0.2283940613269806, "rewards/margins": 0.3240775167942047, "rewards/rejected": -0.5524715781211853, "step": 4187 }, { "epoch": 11.46611909650924, "grad_norm": 3.305995464324951, "learning_rate": 4.263013698630137e-07, "log_odds_chosen": 2.774479389190674, "log_odds_ratio": -0.2837357819080353, "logits/chosen": 1.0010465383529663, "logits/rejected": 0.9724119901657104, "logps/chosen": -1.6907024383544922, "logps/rejected": -4.2939300537109375, "loss": 0.5963, "nll_loss": 0.5679304599761963, "rewards/accuracies": 0.75, "rewards/chosen": -0.1690702587366104, "rewards/margins": 0.2603227496147156, "rewards/rejected": -0.4293929934501648, "step": 4188 }, { "epoch": 11.468856947296372, "grad_norm": 3.292600631713867, "learning_rate": 4.261643835616438e-07, "log_odds_chosen": 2.0192930698394775, "log_odds_ratio": -0.20329342782497406, "logits/chosen": 0.9793417453765869, "logits/rejected": 0.99095219373703, "logps/chosen": -1.2977228164672852, "logps/rejected": -3.054081439971924, "loss": 0.4853, "nll_loss": 0.4649335443973541, "rewards/accuracies": 1.0, "rewards/chosen": -0.12977227568626404, "rewards/margins": 0.17563588917255402, "rewards/rejected": -0.30540817975997925, "step": 4189 }, { "epoch": 11.471594798083505, "grad_norm": 3.5098888874053955, "learning_rate": 4.26027397260274e-07, "log_odds_chosen": 2.437978506088257, "log_odds_ratio": -0.26395609974861145, "logits/chosen": 1.307113528251648, "logits/rejected": 1.333128571510315, "logps/chosen": -1.9576616287231445, "logps/rejected": -4.238545894622803, "loss": 0.5791, "nll_loss": 0.5527032613754272, "rewards/accuracies": 0.875, "rewards/chosen": -0.1957661509513855, "rewards/margins": 0.22808843851089478, "rewards/rejected": -0.42385461926460266, "step": 4190 }, { "epoch": 11.474332648870636, "grad_norm": 4.575211048126221, "learning_rate": 4.258904109589041e-07, "log_odds_chosen": 2.6296567916870117, "log_odds_ratio": -0.5097322463989258, "logits/chosen": 1.0455684661865234, "logits/rejected": 1.094183325767517, "logps/chosen": -1.8518009185791016, "logps/rejected": -4.28376579284668, "loss": 0.5328, "nll_loss": 0.4818480312824249, "rewards/accuracies": 0.875, "rewards/chosen": -0.18518009781837463, "rewards/margins": 0.2431964874267578, "rewards/rejected": -0.42837655544281006, "step": 4191 }, { "epoch": 11.477070499657769, "grad_norm": 3.808370590209961, "learning_rate": 4.257534246575342e-07, "log_odds_chosen": 2.243720293045044, "log_odds_ratio": -0.2055755853652954, "logits/chosen": 1.1743298768997192, "logits/rejected": 1.2161513566970825, "logps/chosen": -1.9174280166625977, "logps/rejected": -3.929295539855957, "loss": 0.5164, "nll_loss": 0.49588990211486816, "rewards/accuracies": 0.875, "rewards/chosen": -0.19174279272556305, "rewards/margins": 0.20118674635887146, "rewards/rejected": -0.3929295539855957, "step": 4192 }, { "epoch": 11.4798083504449, "grad_norm": 3.602332353591919, "learning_rate": 4.2561643835616435e-07, "log_odds_chosen": 3.9435348510742188, "log_odds_ratio": -0.09549419581890106, "logits/chosen": 0.9148968458175659, "logits/rejected": 0.9704387784004211, "logps/chosen": -1.5666165351867676, "logps/rejected": -5.194703102111816, "loss": 0.6539, "nll_loss": 0.6443841457366943, "rewards/accuracies": 1.0, "rewards/chosen": -0.15666168928146362, "rewards/margins": 0.3628086745738983, "rewards/rejected": -0.5194703340530396, "step": 4193 }, { "epoch": 11.482546201232033, "grad_norm": 3.0900533199310303, "learning_rate": 4.254794520547945e-07, "log_odds_chosen": 3.064441680908203, "log_odds_ratio": -0.2581857144832611, "logits/chosen": 1.4398592710494995, "logits/rejected": 1.4195095300674438, "logps/chosen": -1.6250779628753662, "logps/rejected": -4.504881858825684, "loss": 0.5326, "nll_loss": 0.5068103075027466, "rewards/accuracies": 0.875, "rewards/chosen": -0.1625078022480011, "rewards/margins": 0.2879803776741028, "rewards/rejected": -0.4504881799221039, "step": 4194 }, { "epoch": 11.485284052019164, "grad_norm": 3.971510410308838, "learning_rate": 4.2534246575342465e-07, "log_odds_chosen": 2.4475460052490234, "log_odds_ratio": -0.2132207751274109, "logits/chosen": 0.9484280347824097, "logits/rejected": 1.0149900913238525, "logps/chosen": -2.221951961517334, "logps/rejected": -4.568920135498047, "loss": 0.6987, "nll_loss": 0.677331805229187, "rewards/accuracies": 1.0, "rewards/chosen": -0.22219520807266235, "rewards/margins": 0.23469685018062592, "rewards/rejected": -0.4568920135498047, "step": 4195 }, { "epoch": 11.488021902806297, "grad_norm": 4.011726379394531, "learning_rate": 4.2520547945205475e-07, "log_odds_chosen": 1.9021674394607544, "log_odds_ratio": -0.2888796627521515, "logits/chosen": 1.090665340423584, "logits/rejected": 1.113830804824829, "logps/chosen": -1.933877944946289, "logps/rejected": -3.6574692726135254, "loss": 0.5751, "nll_loss": 0.546208381652832, "rewards/accuracies": 0.875, "rewards/chosen": -0.19338779151439667, "rewards/margins": 0.1723591387271881, "rewards/rejected": -0.3657469153404236, "step": 4196 }, { "epoch": 11.49075975359343, "grad_norm": 5.462583065032959, "learning_rate": 4.2506849315068496e-07, "log_odds_chosen": 2.4550857543945312, "log_odds_ratio": -0.21103955805301666, "logits/chosen": 0.7138150334358215, "logits/rejected": 0.6268541812896729, "logps/chosen": -1.8848928213119507, "logps/rejected": -4.179510593414307, "loss": 0.6357, "nll_loss": 0.6146179437637329, "rewards/accuracies": 1.0, "rewards/chosen": -0.18848928809165955, "rewards/margins": 0.22946175932884216, "rewards/rejected": -0.4179510474205017, "step": 4197 }, { "epoch": 11.493497604380561, "grad_norm": 3.186288595199585, "learning_rate": 4.2493150684931506e-07, "log_odds_chosen": 3.6680731773376465, "log_odds_ratio": -0.1541871577501297, "logits/chosen": 1.1954981088638306, "logits/rejected": 1.2526202201843262, "logps/chosen": -1.5127311944961548, "logps/rejected": -4.929393291473389, "loss": 0.6154, "nll_loss": 0.5999881625175476, "rewards/accuracies": 1.0, "rewards/chosen": -0.15127311646938324, "rewards/margins": 0.34166622161865234, "rewards/rejected": -0.4929393529891968, "step": 4198 }, { "epoch": 11.496235455167694, "grad_norm": 4.115192413330078, "learning_rate": 4.2479452054794516e-07, "log_odds_chosen": 3.016136646270752, "log_odds_ratio": -0.12045499682426453, "logits/chosen": 1.4561817646026611, "logits/rejected": 1.426845669746399, "logps/chosen": -1.6046347618103027, "logps/rejected": -4.354789733886719, "loss": 0.489, "nll_loss": 0.47698545455932617, "rewards/accuracies": 1.0, "rewards/chosen": -0.16046348214149475, "rewards/margins": 0.27501553297042847, "rewards/rejected": -0.43547898530960083, "step": 4199 }, { "epoch": 11.498973305954825, "grad_norm": 3.419208526611328, "learning_rate": 4.246575342465753e-07, "log_odds_chosen": 2.8281428813934326, "log_odds_ratio": -0.15854017436504364, "logits/chosen": 1.0633450746536255, "logits/rejected": 0.994281530380249, "logps/chosen": -1.6346241235733032, "logps/rejected": -4.218921184539795, "loss": 0.5585, "nll_loss": 0.5426547527313232, "rewards/accuracies": 1.0, "rewards/chosen": -0.16346241533756256, "rewards/margins": 0.25842970609664917, "rewards/rejected": -0.4218921363353729, "step": 4200 }, { "epoch": 11.501711156741958, "grad_norm": 4.253641128540039, "learning_rate": 4.2452054794520546e-07, "log_odds_chosen": 2.5187454223632812, "log_odds_ratio": -0.19214580953121185, "logits/chosen": 0.9525936841964722, "logits/rejected": 0.8485991954803467, "logps/chosen": -1.4127336740493774, "logps/rejected": -3.7129955291748047, "loss": 0.5415, "nll_loss": 0.5223339796066284, "rewards/accuracies": 1.0, "rewards/chosen": -0.1412733793258667, "rewards/margins": 0.23002617061138153, "rewards/rejected": -0.3712995648384094, "step": 4201 }, { "epoch": 11.50444900752909, "grad_norm": 3.894305467605591, "learning_rate": 4.243835616438356e-07, "log_odds_chosen": 3.762643337249756, "log_odds_ratio": -0.18413925170898438, "logits/chosen": 0.9821004271507263, "logits/rejected": 0.9662326574325562, "logps/chosen": -1.9372270107269287, "logps/rejected": -5.520828723907471, "loss": 0.6099, "nll_loss": 0.5914416313171387, "rewards/accuracies": 1.0, "rewards/chosen": -0.1937226951122284, "rewards/margins": 0.3583602011203766, "rewards/rejected": -0.552082896232605, "step": 4202 }, { "epoch": 11.507186858316222, "grad_norm": 3.5360822677612305, "learning_rate": 4.242465753424657e-07, "log_odds_chosen": 2.879617691040039, "log_odds_ratio": -0.20980076491832733, "logits/chosen": 0.9344075918197632, "logits/rejected": 0.9109359979629517, "logps/chosen": -1.9486232995986938, "logps/rejected": -4.649284362792969, "loss": 0.6754, "nll_loss": 0.6544079780578613, "rewards/accuracies": 0.875, "rewards/chosen": -0.19486233592033386, "rewards/margins": 0.27006614208221436, "rewards/rejected": -0.4649284780025482, "step": 4203 }, { "epoch": 11.509924709103354, "grad_norm": 3.161133289337158, "learning_rate": 4.241095890410959e-07, "log_odds_chosen": 3.8649120330810547, "log_odds_ratio": -0.16352717578411102, "logits/chosen": 1.2989565134048462, "logits/rejected": 1.2829254865646362, "logps/chosen": -2.2124361991882324, "logps/rejected": -5.941871643066406, "loss": 0.6099, "nll_loss": 0.5935153365135193, "rewards/accuracies": 1.0, "rewards/chosen": -0.22124361991882324, "rewards/margins": 0.3729435205459595, "rewards/rejected": -0.5941871404647827, "step": 4204 }, { "epoch": 11.512662559890487, "grad_norm": 3.8510098457336426, "learning_rate": 4.23972602739726e-07, "log_odds_chosen": 1.7626392841339111, "log_odds_ratio": -0.3574024736881256, "logits/chosen": 0.8994946479797363, "logits/rejected": 0.8427821397781372, "logps/chosen": -1.7105729579925537, "logps/rejected": -3.3400654792785645, "loss": 0.5654, "nll_loss": 0.5296280980110168, "rewards/accuracies": 0.75, "rewards/chosen": -0.1710573136806488, "rewards/margins": 0.16294926404953003, "rewards/rejected": -0.33400657773017883, "step": 4205 }, { "epoch": 11.515400410677618, "grad_norm": 3.4637975692749023, "learning_rate": 4.238356164383561e-07, "log_odds_chosen": 3.219813108444214, "log_odds_ratio": -0.24185322225093842, "logits/chosen": 1.163655400276184, "logits/rejected": 1.1671786308288574, "logps/chosen": -1.320261001586914, "logps/rejected": -4.313231945037842, "loss": 0.4745, "nll_loss": 0.45033663511276245, "rewards/accuracies": 1.0, "rewards/chosen": -0.1320260912179947, "rewards/margins": 0.29929712414741516, "rewards/rejected": -0.43132320046424866, "step": 4206 }, { "epoch": 11.51813826146475, "grad_norm": 3.98305344581604, "learning_rate": 4.2369863013698627e-07, "log_odds_chosen": 2.1349611282348633, "log_odds_ratio": -0.2627860903739929, "logits/chosen": 1.0955466032028198, "logits/rejected": 1.0794570446014404, "logps/chosen": -2.2070226669311523, "logps/rejected": -4.218092918395996, "loss": 0.514, "nll_loss": 0.48771047592163086, "rewards/accuracies": 0.875, "rewards/chosen": -0.22070224583148956, "rewards/margins": 0.20110705494880676, "rewards/rejected": -0.4218093156814575, "step": 4207 }, { "epoch": 11.520876112251882, "grad_norm": 8.988574981689453, "learning_rate": 4.235616438356164e-07, "log_odds_chosen": 1.6719454526901245, "log_odds_ratio": -0.5078319311141968, "logits/chosen": 1.0083510875701904, "logits/rejected": 1.0151225328445435, "logps/chosen": -2.4838597774505615, "logps/rejected": -4.041477680206299, "loss": 0.6065, "nll_loss": 0.5557333827018738, "rewards/accuracies": 0.625, "rewards/chosen": -0.2483859807252884, "rewards/margins": 0.15576180815696716, "rewards/rejected": -0.40414777398109436, "step": 4208 }, { "epoch": 11.523613963039015, "grad_norm": 4.1308088302612305, "learning_rate": 4.2342465753424657e-07, "log_odds_chosen": 1.145963430404663, "log_odds_ratio": -0.31853193044662476, "logits/chosen": 1.142418384552002, "logits/rejected": 1.1105271577835083, "logps/chosen": -1.265669584274292, "logps/rejected": -2.206895351409912, "loss": 0.5087, "nll_loss": 0.4768630564212799, "rewards/accuracies": 1.0, "rewards/chosen": -0.12656696140766144, "rewards/margins": 0.09412257373332977, "rewards/rejected": -0.2206895500421524, "step": 4209 }, { "epoch": 11.526351813826146, "grad_norm": 3.600282669067383, "learning_rate": 4.2328767123287667e-07, "log_odds_chosen": 1.7345318794250488, "log_odds_ratio": -0.28881874680519104, "logits/chosen": 0.8298501968383789, "logits/rejected": 0.8342452049255371, "logps/chosen": -1.8808739185333252, "logps/rejected": -3.4906985759735107, "loss": 0.5827, "nll_loss": 0.553846538066864, "rewards/accuracies": 0.875, "rewards/chosen": -0.18808738887310028, "rewards/margins": 0.16098247468471527, "rewards/rejected": -0.34906983375549316, "step": 4210 }, { "epoch": 11.529089664613279, "grad_norm": 3.5516209602355957, "learning_rate": 4.231506849315069e-07, "log_odds_chosen": 1.7447260618209839, "log_odds_ratio": -0.2613567113876343, "logits/chosen": 0.8997853994369507, "logits/rejected": 0.8808712959289551, "logps/chosen": -1.4863508939743042, "logps/rejected": -2.937959671020508, "loss": 0.4623, "nll_loss": 0.4362046718597412, "rewards/accuracies": 0.875, "rewards/chosen": -0.14863508939743042, "rewards/margins": 0.14516091346740723, "rewards/rejected": -0.29379600286483765, "step": 4211 }, { "epoch": 11.53182751540041, "grad_norm": 5.195977687835693, "learning_rate": 4.23013698630137e-07, "log_odds_chosen": 2.676588296890259, "log_odds_ratio": -0.19894500076770782, "logits/chosen": 0.9760147333145142, "logits/rejected": 1.019313097000122, "logps/chosen": -1.8927196264266968, "logps/rejected": -4.386441230773926, "loss": 0.575, "nll_loss": 0.555094301700592, "rewards/accuracies": 1.0, "rewards/chosen": -0.1892719566822052, "rewards/margins": 0.24937213957309723, "rewards/rejected": -0.4386441111564636, "step": 4212 }, { "epoch": 11.534565366187543, "grad_norm": 7.4334397315979, "learning_rate": 4.2287671232876707e-07, "log_odds_chosen": 2.8493432998657227, "log_odds_ratio": -0.5887704491615295, "logits/chosen": 1.2340842485427856, "logits/rejected": 1.1984504461288452, "logps/chosen": -2.3409762382507324, "logps/rejected": -5.065777778625488, "loss": 0.6536, "nll_loss": 0.5946757793426514, "rewards/accuracies": 0.875, "rewards/chosen": -0.23409762978553772, "rewards/margins": 0.27248015999794006, "rewards/rejected": -0.5065777897834778, "step": 4213 }, { "epoch": 11.537303216974674, "grad_norm": 3.296410083770752, "learning_rate": 4.227397260273972e-07, "log_odds_chosen": 2.979459762573242, "log_odds_ratio": -0.09791762381792068, "logits/chosen": 1.0907518863677979, "logits/rejected": 1.0940446853637695, "logps/chosen": -2.351841449737549, "logps/rejected": -5.196117401123047, "loss": 0.6182, "nll_loss": 0.6084216833114624, "rewards/accuracies": 1.0, "rewards/chosen": -0.2351841777563095, "rewards/margins": 0.28442758321762085, "rewards/rejected": -0.5196117162704468, "step": 4214 }, { "epoch": 11.540041067761807, "grad_norm": 3.710261106491089, "learning_rate": 4.226027397260274e-07, "log_odds_chosen": 2.605841875076294, "log_odds_ratio": -0.35832297801971436, "logits/chosen": 1.1079448461532593, "logits/rejected": 1.0666323900222778, "logps/chosen": -2.044808864593506, "logps/rejected": -4.519200801849365, "loss": 0.5393, "nll_loss": 0.5034357309341431, "rewards/accuracies": 0.875, "rewards/chosen": -0.20448090136051178, "rewards/margins": 0.2474391609430313, "rewards/rejected": -0.4519200623035431, "step": 4215 }, { "epoch": 11.542778918548938, "grad_norm": 3.4996602535247803, "learning_rate": 4.2246575342465753e-07, "log_odds_chosen": 2.2531280517578125, "log_odds_ratio": -0.39978092908859253, "logits/chosen": 1.140001893043518, "logits/rejected": 1.1575932502746582, "logps/chosen": -1.8862967491149902, "logps/rejected": -3.9940197467803955, "loss": 0.625, "nll_loss": 0.5850694179534912, "rewards/accuracies": 0.875, "rewards/chosen": -0.18862967193126678, "rewards/margins": 0.210772305727005, "rewards/rejected": -0.3994019627571106, "step": 4216 }, { "epoch": 11.545516769336071, "grad_norm": 3.6673808097839355, "learning_rate": 4.2232876712328763e-07, "log_odds_chosen": 2.3101553916931152, "log_odds_ratio": -0.16517925262451172, "logits/chosen": 1.0748646259307861, "logits/rejected": 1.056484580039978, "logps/chosen": -1.4889839887619019, "logps/rejected": -3.584415912628174, "loss": 0.5293, "nll_loss": 0.5128288865089417, "rewards/accuracies": 1.0, "rewards/chosen": -0.1488983929157257, "rewards/margins": 0.20954318344593048, "rewards/rejected": -0.3584415912628174, "step": 4217 }, { "epoch": 11.548254620123203, "grad_norm": 2.9882047176361084, "learning_rate": 4.2219178082191783e-07, "log_odds_chosen": 5.049698352813721, "log_odds_ratio": -0.11176657676696777, "logits/chosen": 1.0906765460968018, "logits/rejected": 1.0701277256011963, "logps/chosen": -1.764966368675232, "logps/rejected": -6.637377738952637, "loss": 0.5701, "nll_loss": 0.5588759183883667, "rewards/accuracies": 1.0, "rewards/chosen": -0.17649662494659424, "rewards/margins": 0.48724114894866943, "rewards/rejected": -0.6637377738952637, "step": 4218 }, { "epoch": 11.550992470910336, "grad_norm": 3.546285629272461, "learning_rate": 4.2205479452054793e-07, "log_odds_chosen": 1.8411046266555786, "log_odds_ratio": -0.28571802377700806, "logits/chosen": 0.9758041501045227, "logits/rejected": 1.0026822090148926, "logps/chosen": -1.705350637435913, "logps/rejected": -3.3793411254882812, "loss": 0.6097, "nll_loss": 0.5811503529548645, "rewards/accuracies": 1.0, "rewards/chosen": -0.17053507268428802, "rewards/margins": 0.16739904880523682, "rewards/rejected": -0.33793413639068604, "step": 4219 }, { "epoch": 11.553730321697467, "grad_norm": 3.650022029876709, "learning_rate": 4.2191780821917803e-07, "log_odds_chosen": 3.9114701747894287, "log_odds_ratio": -0.10109929740428925, "logits/chosen": 0.800834059715271, "logits/rejected": 0.8018417358398438, "logps/chosen": -2.17161226272583, "logps/rejected": -5.918781280517578, "loss": 0.6016, "nll_loss": 0.591464638710022, "rewards/accuracies": 1.0, "rewards/chosen": -0.21716122329235077, "rewards/margins": 0.3747168779373169, "rewards/rejected": -0.5918781161308289, "step": 4220 }, { "epoch": 11.5564681724846, "grad_norm": 3.430032730102539, "learning_rate": 4.2178082191780824e-07, "log_odds_chosen": 2.625501871109009, "log_odds_ratio": -0.2501375079154968, "logits/chosen": 0.9669164419174194, "logits/rejected": 0.9760029911994934, "logps/chosen": -1.5545462369918823, "logps/rejected": -3.9926862716674805, "loss": 0.5462, "nll_loss": 0.521194338798523, "rewards/accuracies": 1.0, "rewards/chosen": -0.1554546356201172, "rewards/margins": 0.24381402134895325, "rewards/rejected": -0.39926862716674805, "step": 4221 }, { "epoch": 11.55920602327173, "grad_norm": 4.863299369812012, "learning_rate": 4.2164383561643834e-07, "log_odds_chosen": 2.1862432956695557, "log_odds_ratio": -0.33400964736938477, "logits/chosen": 0.9559639096260071, "logits/rejected": 0.802899956703186, "logps/chosen": -1.7892956733703613, "logps/rejected": -3.778083324432373, "loss": 0.5696, "nll_loss": 0.5361940860748291, "rewards/accuracies": 0.875, "rewards/chosen": -0.17892956733703613, "rewards/margins": 0.19887876510620117, "rewards/rejected": -0.3778083324432373, "step": 4222 }, { "epoch": 11.561943874058864, "grad_norm": 3.5227644443511963, "learning_rate": 4.215068493150685e-07, "log_odds_chosen": 1.9051513671875, "log_odds_ratio": -0.18970030546188354, "logits/chosen": 0.9596083164215088, "logits/rejected": 0.7678115367889404, "logps/chosen": -1.2532272338867188, "logps/rejected": -2.8672361373901367, "loss": 0.5026, "nll_loss": 0.48365941643714905, "rewards/accuracies": 1.0, "rewards/chosen": -0.12532272934913635, "rewards/margins": 0.1614009141921997, "rewards/rejected": -0.28672364354133606, "step": 4223 }, { "epoch": 11.564681724845997, "grad_norm": 3.65805983543396, "learning_rate": 4.213698630136986e-07, "log_odds_chosen": 1.8407219648361206, "log_odds_ratio": -0.20393098890781403, "logits/chosen": 0.9136763215065002, "logits/rejected": 0.9308100342750549, "logps/chosen": -1.5384055376052856, "logps/rejected": -3.180938482284546, "loss": 0.5094, "nll_loss": 0.4890323579311371, "rewards/accuracies": 1.0, "rewards/chosen": -0.1538405418395996, "rewards/margins": 0.16425330936908722, "rewards/rejected": -0.318093866109848, "step": 4224 }, { "epoch": 11.567419575633128, "grad_norm": 3.5807268619537354, "learning_rate": 4.212328767123288e-07, "log_odds_chosen": 2.6773152351379395, "log_odds_ratio": -0.2351449728012085, "logits/chosen": 0.9507948756217957, "logits/rejected": 0.9707951545715332, "logps/chosen": -1.5278247594833374, "logps/rejected": -4.011307716369629, "loss": 0.6547, "nll_loss": 0.6311591267585754, "rewards/accuracies": 1.0, "rewards/chosen": -0.15278248488903046, "rewards/margins": 0.24834828078746796, "rewards/rejected": -0.401130735874176, "step": 4225 }, { "epoch": 11.570157426420261, "grad_norm": 3.533928155899048, "learning_rate": 4.210958904109589e-07, "log_odds_chosen": 3.6676881313323975, "log_odds_ratio": -0.2929617166519165, "logits/chosen": 1.0662519931793213, "logits/rejected": 1.056624174118042, "logps/chosen": -1.8333609104156494, "logps/rejected": -5.356054782867432, "loss": 0.5591, "nll_loss": 0.5298420786857605, "rewards/accuracies": 0.875, "rewards/chosen": -0.18333610892295837, "rewards/margins": 0.35226941108703613, "rewards/rejected": -0.5356054902076721, "step": 4226 }, { "epoch": 11.572895277207392, "grad_norm": 3.702934741973877, "learning_rate": 4.20958904109589e-07, "log_odds_chosen": 3.2194132804870605, "log_odds_ratio": -0.11659224331378937, "logits/chosen": 1.0770070552825928, "logits/rejected": 1.138073444366455, "logps/chosen": -1.4463181495666504, "logps/rejected": -4.098967552185059, "loss": 0.512, "nll_loss": 0.5003146529197693, "rewards/accuracies": 1.0, "rewards/chosen": -0.14463181793689728, "rewards/margins": 0.26526492834091187, "rewards/rejected": -0.40989676117897034, "step": 4227 }, { "epoch": 11.575633127994525, "grad_norm": 3.6838996410369873, "learning_rate": 4.208219178082192e-07, "log_odds_chosen": 1.9055993556976318, "log_odds_ratio": -0.3255504071712494, "logits/chosen": 1.0342575311660767, "logits/rejected": 1.0562855005264282, "logps/chosen": -1.943274736404419, "logps/rejected": -3.6824779510498047, "loss": 0.6352, "nll_loss": 0.6026843786239624, "rewards/accuracies": 0.75, "rewards/chosen": -0.1943274736404419, "rewards/margins": 0.17392030358314514, "rewards/rejected": -0.36824777722358704, "step": 4228 }, { "epoch": 11.578370978781656, "grad_norm": 3.6117286682128906, "learning_rate": 4.206849315068493e-07, "log_odds_chosen": 1.920231819152832, "log_odds_ratio": -0.3050902187824249, "logits/chosen": 1.0715802907943726, "logits/rejected": 1.095718502998352, "logps/chosen": -1.6908513307571411, "logps/rejected": -3.4627304077148438, "loss": 0.5947, "nll_loss": 0.5641824007034302, "rewards/accuracies": 0.875, "rewards/chosen": -0.16908514499664307, "rewards/margins": 0.17718788981437683, "rewards/rejected": -0.3462730348110199, "step": 4229 }, { "epoch": 11.58110882956879, "grad_norm": 3.3661301136016846, "learning_rate": 4.2054794520547945e-07, "log_odds_chosen": 3.1201586723327637, "log_odds_ratio": -0.24912425875663757, "logits/chosen": 0.8883828520774841, "logits/rejected": 0.873569130897522, "logps/chosen": -1.996779441833496, "logps/rejected": -4.948753356933594, "loss": 0.6524, "nll_loss": 0.6274649500846863, "rewards/accuracies": 0.875, "rewards/chosen": -0.1996779441833496, "rewards/margins": 0.29519736766815186, "rewards/rejected": -0.49487531185150146, "step": 4230 }, { "epoch": 11.58384668035592, "grad_norm": 3.709573745727539, "learning_rate": 4.2041095890410955e-07, "log_odds_chosen": 2.6648995876312256, "log_odds_ratio": -0.296772301197052, "logits/chosen": 0.8912246227264404, "logits/rejected": 0.8518939018249512, "logps/chosen": -1.8751134872436523, "logps/rejected": -4.390183448791504, "loss": 0.5749, "nll_loss": 0.5452020168304443, "rewards/accuracies": 0.875, "rewards/chosen": -0.1875113546848297, "rewards/margins": 0.2515070140361786, "rewards/rejected": -0.4390183687210083, "step": 4231 }, { "epoch": 11.586584531143053, "grad_norm": 3.0919735431671143, "learning_rate": 4.202739726027397e-07, "log_odds_chosen": 2.557809591293335, "log_odds_ratio": -0.1156817302107811, "logits/chosen": 0.8603357076644897, "logits/rejected": 0.9150944948196411, "logps/chosen": -1.45723557472229, "logps/rejected": -3.651287078857422, "loss": 0.5478, "nll_loss": 0.5362815260887146, "rewards/accuracies": 1.0, "rewards/chosen": -0.14572355151176453, "rewards/margins": 0.2194051444530487, "rewards/rejected": -0.36512869596481323, "step": 4232 }, { "epoch": 11.589322381930184, "grad_norm": 3.91230845451355, "learning_rate": 4.2013698630136985e-07, "log_odds_chosen": 2.8094561100006104, "log_odds_ratio": -0.2965765595436096, "logits/chosen": 0.7304610013961792, "logits/rejected": 0.7187725901603699, "logps/chosen": -2.0659165382385254, "logps/rejected": -4.703281402587891, "loss": 0.5959, "nll_loss": 0.5662234425544739, "rewards/accuracies": 0.875, "rewards/chosen": -0.2065916657447815, "rewards/margins": 0.2637365162372589, "rewards/rejected": -0.4703282117843628, "step": 4233 }, { "epoch": 11.592060232717317, "grad_norm": 3.7921812534332275, "learning_rate": 4.1999999999999995e-07, "log_odds_chosen": 2.5559849739074707, "log_odds_ratio": -0.2828628420829773, "logits/chosen": 0.7943556904792786, "logits/rejected": 0.7487561106681824, "logps/chosen": -1.7567484378814697, "logps/rejected": -4.12098503112793, "loss": 0.5765, "nll_loss": 0.5481657385826111, "rewards/accuracies": 1.0, "rewards/chosen": -0.17567485570907593, "rewards/margins": 0.23642370104789734, "rewards/rejected": -0.4120985269546509, "step": 4234 }, { "epoch": 11.594798083504449, "grad_norm": 7.105854511260986, "learning_rate": 4.1986301369863015e-07, "log_odds_chosen": 1.976269245147705, "log_odds_ratio": -0.25520065426826477, "logits/chosen": 1.2635722160339355, "logits/rejected": 1.1824872493743896, "logps/chosen": -1.913428783416748, "logps/rejected": -3.6858174800872803, "loss": 0.5551, "nll_loss": 0.5296075940132141, "rewards/accuracies": 1.0, "rewards/chosen": -0.19134286046028137, "rewards/margins": 0.17723888158798218, "rewards/rejected": -0.36858177185058594, "step": 4235 }, { "epoch": 11.597535934291582, "grad_norm": 3.4203295707702637, "learning_rate": 4.1972602739726025e-07, "log_odds_chosen": 3.761392831802368, "log_odds_ratio": -0.0664568617939949, "logits/chosen": 0.9509826898574829, "logits/rejected": 0.9410766363143921, "logps/chosen": -2.0445003509521484, "logps/rejected": -5.6269683837890625, "loss": 0.5733, "nll_loss": 0.5666992664337158, "rewards/accuracies": 1.0, "rewards/chosen": -0.20445004105567932, "rewards/margins": 0.3582468628883362, "rewards/rejected": -0.5626969337463379, "step": 4236 }, { "epoch": 11.600273785078713, "grad_norm": 3.467928409576416, "learning_rate": 4.195890410958904e-07, "log_odds_chosen": 3.2831039428710938, "log_odds_ratio": -0.11329007148742676, "logits/chosen": 0.9315582513809204, "logits/rejected": 0.9480204582214355, "logps/chosen": -1.4924638271331787, "logps/rejected": -4.475404739379883, "loss": 0.4989, "nll_loss": 0.4875640869140625, "rewards/accuracies": 1.0, "rewards/chosen": -0.14924637973308563, "rewards/margins": 0.2982941269874573, "rewards/rejected": -0.4475405216217041, "step": 4237 }, { "epoch": 11.603011635865846, "grad_norm": 5.397265434265137, "learning_rate": 4.194520547945205e-07, "log_odds_chosen": 2.9148130416870117, "log_odds_ratio": -0.2254362851381302, "logits/chosen": 1.2224746942520142, "logits/rejected": 1.2355737686157227, "logps/chosen": -2.8581557273864746, "logps/rejected": -5.704097747802734, "loss": 0.7794, "nll_loss": 0.7568275332450867, "rewards/accuracies": 1.0, "rewards/chosen": -0.28581559658050537, "rewards/margins": 0.28459420800209045, "rewards/rejected": -0.5704097747802734, "step": 4238 }, { "epoch": 11.605749486652977, "grad_norm": 4.403668403625488, "learning_rate": 4.1931506849315066e-07, "log_odds_chosen": 1.7126736640930176, "log_odds_ratio": -0.30641162395477295, "logits/chosen": 0.9887614250183105, "logits/rejected": 1.0127946138381958, "logps/chosen": -2.3068902492523193, "logps/rejected": -3.8784539699554443, "loss": 0.6312, "nll_loss": 0.6005606651306152, "rewards/accuracies": 1.0, "rewards/chosen": -0.23068901896476746, "rewards/margins": 0.1571563482284546, "rewards/rejected": -0.38784536719322205, "step": 4239 }, { "epoch": 11.60848733744011, "grad_norm": 8.246984481811523, "learning_rate": 4.191780821917808e-07, "log_odds_chosen": 4.203424453735352, "log_odds_ratio": -0.1503923386335373, "logits/chosen": 1.046513319015503, "logits/rejected": 1.039522409439087, "logps/chosen": -1.6687105894088745, "logps/rejected": -5.627671241760254, "loss": 0.6307, "nll_loss": 0.6156406402587891, "rewards/accuracies": 1.0, "rewards/chosen": -0.1668710559606552, "rewards/margins": 0.3958961069583893, "rewards/rejected": -0.5627671480178833, "step": 4240 }, { "epoch": 11.611225188227241, "grad_norm": 6.857512474060059, "learning_rate": 4.190410958904109e-07, "log_odds_chosen": 2.9583475589752197, "log_odds_ratio": -0.34999769926071167, "logits/chosen": 1.0290478467941284, "logits/rejected": 1.0361782312393188, "logps/chosen": -2.1368842124938965, "logps/rejected": -4.990668296813965, "loss": 0.6029, "nll_loss": 0.5679444074630737, "rewards/accuracies": 0.875, "rewards/chosen": -0.21368840336799622, "rewards/margins": 0.28537845611572266, "rewards/rejected": -0.49906688928604126, "step": 4241 }, { "epoch": 11.613963039014374, "grad_norm": 4.501880645751953, "learning_rate": 4.189041095890411e-07, "log_odds_chosen": 3.1887354850769043, "log_odds_ratio": -0.2307915836572647, "logits/chosen": 0.9454245567321777, "logits/rejected": 0.8642065525054932, "logps/chosen": -1.6279075145721436, "logps/rejected": -4.6411542892456055, "loss": 0.7081, "nll_loss": 0.6850349307060242, "rewards/accuracies": 0.875, "rewards/chosen": -0.16279076039791107, "rewards/margins": 0.3013246953487396, "rewards/rejected": -0.4641154706478119, "step": 4242 }, { "epoch": 11.616700889801505, "grad_norm": 3.7216665744781494, "learning_rate": 4.187671232876712e-07, "log_odds_chosen": 2.625819444656372, "log_odds_ratio": -0.14469529688358307, "logits/chosen": 1.1320793628692627, "logits/rejected": 1.1607035398483276, "logps/chosen": -1.8838121891021729, "logps/rejected": -4.208072185516357, "loss": 0.5451, "nll_loss": 0.5306730270385742, "rewards/accuracies": 1.0, "rewards/chosen": -0.18838120996952057, "rewards/margins": 0.23242604732513428, "rewards/rejected": -0.42080724239349365, "step": 4243 }, { "epoch": 11.619438740588638, "grad_norm": 3.6288015842437744, "learning_rate": 4.1863013698630136e-07, "log_odds_chosen": 1.8272926807403564, "log_odds_ratio": -0.2526707947254181, "logits/chosen": 1.153734803199768, "logits/rejected": 1.190452218055725, "logps/chosen": -1.5429904460906982, "logps/rejected": -3.197981595993042, "loss": 0.4813, "nll_loss": 0.45599469542503357, "rewards/accuracies": 1.0, "rewards/chosen": -0.15429902076721191, "rewards/margins": 0.16549912095069885, "rewards/rejected": -0.31979817152023315, "step": 4244 }, { "epoch": 11.62217659137577, "grad_norm": 3.8452205657958984, "learning_rate": 4.1849315068493146e-07, "log_odds_chosen": 1.276827096939087, "log_odds_ratio": -0.30853188037872314, "logits/chosen": 1.0657927989959717, "logits/rejected": 0.9926429986953735, "logps/chosen": -1.5422825813293457, "logps/rejected": -2.6650876998901367, "loss": 0.5243, "nll_loss": 0.4934162497520447, "rewards/accuracies": 1.0, "rewards/chosen": -0.15422827005386353, "rewards/margins": 0.11228051781654358, "rewards/rejected": -0.2665087878704071, "step": 4245 }, { "epoch": 11.624914442162902, "grad_norm": 3.7466037273406982, "learning_rate": 4.183561643835616e-07, "log_odds_chosen": 3.0904698371887207, "log_odds_ratio": -0.20854926109313965, "logits/chosen": 1.4071223735809326, "logits/rejected": 1.4393260478973389, "logps/chosen": -2.4989399909973145, "logps/rejected": -5.4933037757873535, "loss": 0.5853, "nll_loss": 0.5644775629043579, "rewards/accuracies": 0.875, "rewards/chosen": -0.24989399313926697, "rewards/margins": 0.29943639039993286, "rewards/rejected": -0.5493304133415222, "step": 4246 }, { "epoch": 11.627652292950033, "grad_norm": 3.5293538570404053, "learning_rate": 4.1821917808219177e-07, "log_odds_chosen": 1.4633848667144775, "log_odds_ratio": -0.3212755024433136, "logits/chosen": 1.0963919162750244, "logits/rejected": 0.9887837171554565, "logps/chosen": -1.517441987991333, "logps/rejected": -2.8342666625976562, "loss": 0.5387, "nll_loss": 0.5065540075302124, "rewards/accuracies": 0.875, "rewards/chosen": -0.15174420177936554, "rewards/margins": 0.13168247044086456, "rewards/rejected": -0.2834267020225525, "step": 4247 }, { "epoch": 11.630390143737166, "grad_norm": 3.611504077911377, "learning_rate": 4.1808219178082187e-07, "log_odds_chosen": 3.769397735595703, "log_odds_ratio": -0.1992575228214264, "logits/chosen": 0.7411766052246094, "logits/rejected": 0.7086071968078613, "logps/chosen": -1.3615254163742065, "logps/rejected": -4.866011142730713, "loss": 0.5865, "nll_loss": 0.5665435194969177, "rewards/accuracies": 1.0, "rewards/chosen": -0.13615255057811737, "rewards/margins": 0.3504485487937927, "rewards/rejected": -0.4866011142730713, "step": 4248 }, { "epoch": 11.633127994524298, "grad_norm": 3.6079254150390625, "learning_rate": 4.1794520547945207e-07, "log_odds_chosen": 2.3209686279296875, "log_odds_ratio": -0.20248526334762573, "logits/chosen": 0.9510961174964905, "logits/rejected": 0.9694782495498657, "logps/chosen": -1.6656885147094727, "logps/rejected": -3.6950533390045166, "loss": 0.5732, "nll_loss": 0.5529313087463379, "rewards/accuracies": 1.0, "rewards/chosen": -0.1665688455104828, "rewards/margins": 0.20293647050857544, "rewards/rejected": -0.3695053458213806, "step": 4249 }, { "epoch": 11.63586584531143, "grad_norm": 3.8315727710723877, "learning_rate": 4.1780821917808217e-07, "log_odds_chosen": 2.6204135417938232, "log_odds_ratio": -0.3109380602836609, "logits/chosen": 1.1584633588790894, "logits/rejected": 1.0652259588241577, "logps/chosen": -1.4674303531646729, "logps/rejected": -3.8808512687683105, "loss": 0.5092, "nll_loss": 0.47808215022087097, "rewards/accuracies": 0.875, "rewards/chosen": -0.146743044257164, "rewards/margins": 0.24134206771850586, "rewards/rejected": -0.38808512687683105, "step": 4250 }, { "epoch": 11.638603696098563, "grad_norm": 4.233395576477051, "learning_rate": 4.176712328767123e-07, "log_odds_chosen": 2.5631978511810303, "log_odds_ratio": -0.29003942012786865, "logits/chosen": 1.0904619693756104, "logits/rejected": 1.0823487043380737, "logps/chosen": -2.8195149898529053, "logps/rejected": -5.148038387298584, "loss": 0.6912, "nll_loss": 0.6622162461280823, "rewards/accuracies": 0.875, "rewards/chosen": -0.2819514870643616, "rewards/margins": 0.2328522950410843, "rewards/rejected": -0.5148037672042847, "step": 4251 }, { "epoch": 11.641341546885695, "grad_norm": 3.765291929244995, "learning_rate": 4.175342465753424e-07, "log_odds_chosen": 4.432590484619141, "log_odds_ratio": -0.09303230792284012, "logits/chosen": 1.1594945192337036, "logits/rejected": 1.118992805480957, "logps/chosen": -2.054572582244873, "logps/rejected": -6.32058048248291, "loss": 0.6834, "nll_loss": 0.6741448640823364, "rewards/accuracies": 1.0, "rewards/chosen": -0.20545727014541626, "rewards/margins": 0.426600843667984, "rewards/rejected": -0.6320580840110779, "step": 4252 }, { "epoch": 11.644079397672828, "grad_norm": 3.2160327434539795, "learning_rate": 4.173972602739726e-07, "log_odds_chosen": 2.7048392295837402, "log_odds_ratio": -0.1390741765499115, "logits/chosen": 1.1363739967346191, "logits/rejected": 1.0604687929153442, "logps/chosen": -2.0027496814727783, "logps/rejected": -4.441249370574951, "loss": 0.5599, "nll_loss": 0.5459556579589844, "rewards/accuracies": 1.0, "rewards/chosen": -0.2002749741077423, "rewards/margins": 0.243849977850914, "rewards/rejected": -0.4441249370574951, "step": 4253 }, { "epoch": 11.646817248459959, "grad_norm": 3.1041128635406494, "learning_rate": 4.172602739726027e-07, "log_odds_chosen": 3.475482702255249, "log_odds_ratio": -0.11430864036083221, "logits/chosen": 0.9226994514465332, "logits/rejected": 0.7822835445404053, "logps/chosen": -1.9014592170715332, "logps/rejected": -5.193576812744141, "loss": 0.6268, "nll_loss": 0.6153585314750671, "rewards/accuracies": 1.0, "rewards/chosen": -0.19014592468738556, "rewards/margins": 0.3292117714881897, "rewards/rejected": -0.5193576812744141, "step": 4254 }, { "epoch": 11.649555099247092, "grad_norm": 3.536115884780884, "learning_rate": 4.171232876712328e-07, "log_odds_chosen": 3.949925422668457, "log_odds_ratio": -0.1280432492494583, "logits/chosen": 0.9626755714416504, "logits/rejected": 0.9956903457641602, "logps/chosen": -1.806960105895996, "logps/rejected": -5.558051109313965, "loss": 0.6518, "nll_loss": 0.6389575600624084, "rewards/accuracies": 1.0, "rewards/chosen": -0.1806960105895996, "rewards/margins": 0.37510907649993896, "rewards/rejected": -0.5558050274848938, "step": 4255 }, { "epoch": 11.652292950034223, "grad_norm": 4.2689080238342285, "learning_rate": 4.1698630136986303e-07, "log_odds_chosen": 2.7466907501220703, "log_odds_ratio": -0.17274177074432373, "logits/chosen": 1.3258098363876343, "logits/rejected": 1.2986987829208374, "logps/chosen": -2.324568748474121, "logps/rejected": -4.947524070739746, "loss": 0.6357, "nll_loss": 0.6184564828872681, "rewards/accuracies": 1.0, "rewards/chosen": -0.23245686292648315, "rewards/margins": 0.26229551434516907, "rewards/rejected": -0.4947524070739746, "step": 4256 }, { "epoch": 11.655030800821356, "grad_norm": 3.880086660385132, "learning_rate": 4.1684931506849313e-07, "log_odds_chosen": 2.783925771713257, "log_odds_ratio": -0.21084874868392944, "logits/chosen": 0.8985575437545776, "logits/rejected": 0.9897854328155518, "logps/chosen": -2.0462608337402344, "logps/rejected": -4.661407470703125, "loss": 0.6849, "nll_loss": 0.6638170480728149, "rewards/accuracies": 1.0, "rewards/chosen": -0.20462609827518463, "rewards/margins": 0.26151466369628906, "rewards/rejected": -0.4661407470703125, "step": 4257 }, { "epoch": 11.657768651608487, "grad_norm": 3.4467341899871826, "learning_rate": 4.167123287671233e-07, "log_odds_chosen": 3.5489342212677, "log_odds_ratio": -0.22272935509681702, "logits/chosen": 1.1495496034622192, "logits/rejected": 1.1633447408676147, "logps/chosen": -2.2914397716522217, "logps/rejected": -5.729312419891357, "loss": 0.6396, "nll_loss": 0.6173632144927979, "rewards/accuracies": 1.0, "rewards/chosen": -0.22914397716522217, "rewards/margins": 0.3437872529029846, "rewards/rejected": -0.5729312300682068, "step": 4258 }, { "epoch": 11.66050650239562, "grad_norm": 3.3718984127044678, "learning_rate": 4.1657534246575343e-07, "log_odds_chosen": 5.294527053833008, "log_odds_ratio": -0.2233744114637375, "logits/chosen": 0.9294480681419373, "logits/rejected": 1.0080622434616089, "logps/chosen": -1.6722018718719482, "logps/rejected": -6.788908004760742, "loss": 0.6352, "nll_loss": 0.6128531098365784, "rewards/accuracies": 0.875, "rewards/chosen": -0.16722019016742706, "rewards/margins": 0.5116705894470215, "rewards/rejected": -0.6788907647132874, "step": 4259 }, { "epoch": 11.663244353182751, "grad_norm": 5.2353386878967285, "learning_rate": 4.1643835616438353e-07, "log_odds_chosen": 3.9439549446105957, "log_odds_ratio": -0.43290257453918457, "logits/chosen": 0.9017075300216675, "logits/rejected": 0.8897711038589478, "logps/chosen": -2.42097806930542, "logps/rejected": -6.226652145385742, "loss": 0.6505, "nll_loss": 0.6072498559951782, "rewards/accuracies": 0.75, "rewards/chosen": -0.24209782481193542, "rewards/margins": 0.38056740164756775, "rewards/rejected": -0.6226652264595032, "step": 4260 }, { "epoch": 11.665982203969884, "grad_norm": 4.529051780700684, "learning_rate": 4.163013698630137e-07, "log_odds_chosen": 2.1274802684783936, "log_odds_ratio": -0.2901081442832947, "logits/chosen": 0.9289617538452148, "logits/rejected": 0.9239611625671387, "logps/chosen": -1.8564355373382568, "logps/rejected": -3.849743366241455, "loss": 0.5476, "nll_loss": 0.5185630917549133, "rewards/accuracies": 1.0, "rewards/chosen": -0.18564356863498688, "rewards/margins": 0.19933077692985535, "rewards/rejected": -0.3849743604660034, "step": 4261 }, { "epoch": 11.668720054757015, "grad_norm": 6.848706245422363, "learning_rate": 4.161643835616438e-07, "log_odds_chosen": 2.5113370418548584, "log_odds_ratio": -0.8334032893180847, "logits/chosen": 0.8734583854675293, "logits/rejected": 0.8515286445617676, "logps/chosen": -2.5009188652038574, "logps/rejected": -4.83079195022583, "loss": 0.8616, "nll_loss": 0.778303325176239, "rewards/accuracies": 0.75, "rewards/chosen": -0.25009188055992126, "rewards/margins": 0.23298731446266174, "rewards/rejected": -0.483079195022583, "step": 4262 }, { "epoch": 11.671457905544148, "grad_norm": 3.2754218578338623, "learning_rate": 4.16027397260274e-07, "log_odds_chosen": 2.309614658355713, "log_odds_ratio": -0.2011987566947937, "logits/chosen": 0.860668957233429, "logits/rejected": 0.8208112120628357, "logps/chosen": -2.0077168941497803, "logps/rejected": -4.140826225280762, "loss": 0.558, "nll_loss": 0.5379199385643005, "rewards/accuracies": 1.0, "rewards/chosen": -0.20077168941497803, "rewards/margins": 0.21331095695495605, "rewards/rejected": -0.4140826463699341, "step": 4263 }, { "epoch": 11.67419575633128, "grad_norm": 8.197751998901367, "learning_rate": 4.158904109589041e-07, "log_odds_chosen": 3.230072021484375, "log_odds_ratio": -0.28521689772605896, "logits/chosen": 0.9249933958053589, "logits/rejected": 0.9434016942977905, "logps/chosen": -2.532606840133667, "logps/rejected": -5.641109943389893, "loss": 0.6715, "nll_loss": 0.6430116891860962, "rewards/accuracies": 0.875, "rewards/chosen": -0.25326070189476013, "rewards/margins": 0.31085026264190674, "rewards/rejected": -0.5641109943389893, "step": 4264 }, { "epoch": 11.676933607118412, "grad_norm": 3.3507988452911377, "learning_rate": 4.157534246575342e-07, "log_odds_chosen": 2.0015227794647217, "log_odds_ratio": -0.22816121578216553, "logits/chosen": 0.870912492275238, "logits/rejected": 0.8820827007293701, "logps/chosen": -2.372112512588501, "logps/rejected": -4.222199440002441, "loss": 0.5146, "nll_loss": 0.4918128252029419, "rewards/accuracies": 1.0, "rewards/chosen": -0.23721125721931458, "rewards/margins": 0.1850086748600006, "rewards/rejected": -0.4222199618816376, "step": 4265 }, { "epoch": 11.679671457905544, "grad_norm": 5.276511192321777, "learning_rate": 4.156164383561644e-07, "log_odds_chosen": 1.8581948280334473, "log_odds_ratio": -0.3692823648452759, "logits/chosen": 1.0365482568740845, "logits/rejected": 0.9921628832817078, "logps/chosen": -1.872114658355713, "logps/rejected": -3.5885589122772217, "loss": 0.5963, "nll_loss": 0.5594214797019958, "rewards/accuracies": 0.875, "rewards/chosen": -0.18721148371696472, "rewards/margins": 0.1716444343328476, "rewards/rejected": -0.3588559031486511, "step": 4266 }, { "epoch": 11.682409308692677, "grad_norm": 3.7719271183013916, "learning_rate": 4.154794520547945e-07, "log_odds_chosen": 1.9335334300994873, "log_odds_ratio": -0.2551821768283844, "logits/chosen": 1.1746798753738403, "logits/rejected": 1.1817952394485474, "logps/chosen": -2.2274293899536133, "logps/rejected": -4.043200492858887, "loss": 0.525, "nll_loss": 0.49950599670410156, "rewards/accuracies": 1.0, "rewards/chosen": -0.2227429449558258, "rewards/margins": 0.18157707154750824, "rewards/rejected": -0.40432003140449524, "step": 4267 }, { "epoch": 11.685147159479808, "grad_norm": 3.6789305210113525, "learning_rate": 4.1534246575342464e-07, "log_odds_chosen": 3.821356773376465, "log_odds_ratio": -0.305392861366272, "logits/chosen": 0.7539166212081909, "logits/rejected": 0.7081252932548523, "logps/chosen": -2.426138162612915, "logps/rejected": -6.134869575500488, "loss": 0.583, "nll_loss": 0.5524287223815918, "rewards/accuracies": 0.875, "rewards/chosen": -0.24261382222175598, "rewards/margins": 0.3708730936050415, "rewards/rejected": -0.6134868860244751, "step": 4268 }, { "epoch": 11.68788501026694, "grad_norm": 3.7395379543304443, "learning_rate": 4.1520547945205474e-07, "log_odds_chosen": 3.093163251876831, "log_odds_ratio": -0.18528327345848083, "logits/chosen": 1.2398940324783325, "logits/rejected": 1.2194838523864746, "logps/chosen": -1.6731904745101929, "logps/rejected": -4.592210292816162, "loss": 0.5903, "nll_loss": 0.5717712044715881, "rewards/accuracies": 1.0, "rewards/chosen": -0.16731904447078705, "rewards/margins": 0.29190200567245483, "rewards/rejected": -0.4592210650444031, "step": 4269 }, { "epoch": 11.690622861054072, "grad_norm": 3.8860373497009277, "learning_rate": 4.1506849315068495e-07, "log_odds_chosen": 1.7824560403823853, "log_odds_ratio": -0.36498039960861206, "logits/chosen": 0.9560367465019226, "logits/rejected": 0.9706410765647888, "logps/chosen": -2.0766425132751465, "logps/rejected": -3.7437584400177, "loss": 0.607, "nll_loss": 0.5704644918441772, "rewards/accuracies": 0.875, "rewards/chosen": -0.20766426622867584, "rewards/margins": 0.16671158373355865, "rewards/rejected": -0.3743758201599121, "step": 4270 }, { "epoch": 11.693360711841205, "grad_norm": 6.81015157699585, "learning_rate": 4.1493150684931505e-07, "log_odds_chosen": 2.1604154109954834, "log_odds_ratio": -0.18956980109214783, "logits/chosen": 0.8719550371170044, "logits/rejected": 0.8340980410575867, "logps/chosen": -2.1053833961486816, "logps/rejected": -4.102444648742676, "loss": 0.5394, "nll_loss": 0.520416259765625, "rewards/accuracies": 1.0, "rewards/chosen": -0.2105383574962616, "rewards/margins": 0.19970610737800598, "rewards/rejected": -0.4102444648742676, "step": 4271 }, { "epoch": 11.696098562628336, "grad_norm": 5.422431468963623, "learning_rate": 4.1479452054794515e-07, "log_odds_chosen": 2.808925151824951, "log_odds_ratio": -0.3472231924533844, "logits/chosen": 0.933456301689148, "logits/rejected": 0.9253500699996948, "logps/chosen": -2.094822406768799, "logps/rejected": -4.604553699493408, "loss": 0.63, "nll_loss": 0.5952502489089966, "rewards/accuracies": 0.875, "rewards/chosen": -0.20948225259780884, "rewards/margins": 0.2509731352329254, "rewards/rejected": -0.46045535802841187, "step": 4272 }, { "epoch": 11.698836413415469, "grad_norm": 3.852881908416748, "learning_rate": 4.1465753424657535e-07, "log_odds_chosen": 1.249441146850586, "log_odds_ratio": -0.38531675934791565, "logits/chosen": 0.9918686151504517, "logits/rejected": 0.9893796443939209, "logps/chosen": -2.1980278491973877, "logps/rejected": -3.3327457904815674, "loss": 0.5422, "nll_loss": 0.5036937594413757, "rewards/accuracies": 0.75, "rewards/chosen": -0.21980278193950653, "rewards/margins": 0.11347181349992752, "rewards/rejected": -0.33327460289001465, "step": 4273 }, { "epoch": 11.7015742642026, "grad_norm": 3.2255210876464844, "learning_rate": 4.1452054794520545e-07, "log_odds_chosen": 3.22302508354187, "log_odds_ratio": -0.13850608468055725, "logits/chosen": 1.1154367923736572, "logits/rejected": 1.1753188371658325, "logps/chosen": -2.5417287349700928, "logps/rejected": -5.632366180419922, "loss": 0.6789, "nll_loss": 0.6650519967079163, "rewards/accuracies": 1.0, "rewards/chosen": -0.2541728615760803, "rewards/margins": 0.30906370282173157, "rewards/rejected": -0.5632365942001343, "step": 4274 }, { "epoch": 11.704312114989733, "grad_norm": 3.270871639251709, "learning_rate": 4.143835616438356e-07, "log_odds_chosen": 3.9359443187713623, "log_odds_ratio": -0.09299923479557037, "logits/chosen": 1.1202423572540283, "logits/rejected": 1.128115177154541, "logps/chosen": -1.5955885648727417, "logps/rejected": -5.3083696365356445, "loss": 0.5184, "nll_loss": 0.5090951919555664, "rewards/accuracies": 1.0, "rewards/chosen": -0.15955886244773865, "rewards/margins": 0.37127813696861267, "rewards/rejected": -0.5308369994163513, "step": 4275 }, { "epoch": 11.707049965776864, "grad_norm": 4.799901485443115, "learning_rate": 4.142465753424657e-07, "log_odds_chosen": 0.7079888582229614, "log_odds_ratio": -0.5613276362419128, "logits/chosen": 0.8441978693008423, "logits/rejected": 0.8501250743865967, "logps/chosen": -2.1925625801086426, "logps/rejected": -2.8198235034942627, "loss": 0.6181, "nll_loss": 0.5619239211082458, "rewards/accuracies": 0.75, "rewards/chosen": -0.21925625205039978, "rewards/margins": 0.06272611021995544, "rewards/rejected": -0.2819823622703552, "step": 4276 }, { "epoch": 11.709787816563997, "grad_norm": 3.7647287845611572, "learning_rate": 4.141095890410959e-07, "log_odds_chosen": 4.048902988433838, "log_odds_ratio": -0.20066514611244202, "logits/chosen": 1.059666395187378, "logits/rejected": 1.0906004905700684, "logps/chosen": -2.31782603263855, "logps/rejected": -6.215580940246582, "loss": 0.6197, "nll_loss": 0.5996460318565369, "rewards/accuracies": 0.875, "rewards/chosen": -0.23178258538246155, "rewards/margins": 0.38977548480033875, "rewards/rejected": -0.6215580701828003, "step": 4277 }, { "epoch": 11.71252566735113, "grad_norm": 4.653418064117432, "learning_rate": 4.13972602739726e-07, "log_odds_chosen": 1.3680813312530518, "log_odds_ratio": -0.6189385056495667, "logits/chosen": 0.9257611632347107, "logits/rejected": 1.0433909893035889, "logps/chosen": -2.4923834800720215, "logps/rejected": -3.7826743125915527, "loss": 0.6979, "nll_loss": 0.6359685063362122, "rewards/accuracies": 0.875, "rewards/chosen": -0.24923834204673767, "rewards/margins": 0.1290290802717209, "rewards/rejected": -0.37826740741729736, "step": 4278 }, { "epoch": 11.715263518138261, "grad_norm": 3.6109015941619873, "learning_rate": 4.138356164383561e-07, "log_odds_chosen": 1.5328257083892822, "log_odds_ratio": -0.2758359909057617, "logits/chosen": 1.1974824666976929, "logits/rejected": 1.220905065536499, "logps/chosen": -1.4818756580352783, "logps/rejected": -2.7893433570861816, "loss": 0.4636, "nll_loss": 0.4359731078147888, "rewards/accuracies": 1.0, "rewards/chosen": -0.1481875628232956, "rewards/margins": 0.13074679672718048, "rewards/rejected": -0.2789343595504761, "step": 4279 }, { "epoch": 11.718001368925394, "grad_norm": 5.814731121063232, "learning_rate": 4.136986301369863e-07, "log_odds_chosen": 2.946068286895752, "log_odds_ratio": -0.3662179410457611, "logits/chosen": 1.1959787607192993, "logits/rejected": 1.2260178327560425, "logps/chosen": -2.4696292877197266, "logps/rejected": -5.313432693481445, "loss": 0.5633, "nll_loss": 0.5267154574394226, "rewards/accuracies": 0.875, "rewards/chosen": -0.24696293473243713, "rewards/margins": 0.28438037633895874, "rewards/rejected": -0.5313432812690735, "step": 4280 }, { "epoch": 11.720739219712526, "grad_norm": 3.873565673828125, "learning_rate": 4.135616438356164e-07, "log_odds_chosen": 2.3969531059265137, "log_odds_ratio": -0.4210473895072937, "logits/chosen": 0.9898823499679565, "logits/rejected": 1.0222985744476318, "logps/chosen": -2.5295026302337646, "logps/rejected": -4.790072917938232, "loss": 0.6462, "nll_loss": 0.6041361689567566, "rewards/accuracies": 0.875, "rewards/chosen": -0.2529502809047699, "rewards/margins": 0.2260570377111435, "rewards/rejected": -0.4790073037147522, "step": 4281 }, { "epoch": 11.723477070499658, "grad_norm": 4.906185626983643, "learning_rate": 4.1342465753424656e-07, "log_odds_chosen": 1.9649879932403564, "log_odds_ratio": -0.2937866747379303, "logits/chosen": 0.9903396368026733, "logits/rejected": 1.0281181335449219, "logps/chosen": -2.329456090927124, "logps/rejected": -4.119658946990967, "loss": 0.4977, "nll_loss": 0.4682822823524475, "rewards/accuracies": 0.875, "rewards/chosen": -0.23294560611248016, "rewards/margins": 0.17902033030986786, "rewards/rejected": -0.4119659662246704, "step": 4282 }, { "epoch": 11.72621492128679, "grad_norm": 3.4826087951660156, "learning_rate": 4.1328767123287666e-07, "log_odds_chosen": 3.1048617362976074, "log_odds_ratio": -0.15082114934921265, "logits/chosen": 1.2083309888839722, "logits/rejected": 1.2925493717193604, "logps/chosen": -1.5859763622283936, "logps/rejected": -4.4386091232299805, "loss": 0.5252, "nll_loss": 0.5101186037063599, "rewards/accuracies": 1.0, "rewards/chosen": -0.15859761834144592, "rewards/margins": 0.285263329744339, "rewards/rejected": -0.4438609480857849, "step": 4283 }, { "epoch": 11.728952772073923, "grad_norm": 3.570425033569336, "learning_rate": 4.1315068493150686e-07, "log_odds_chosen": 2.416560649871826, "log_odds_ratio": -0.20268383622169495, "logits/chosen": 1.0565605163574219, "logits/rejected": 1.0018997192382812, "logps/chosen": -1.7676719427108765, "logps/rejected": -4.031435012817383, "loss": 0.594, "nll_loss": 0.5737677216529846, "rewards/accuracies": 0.875, "rewards/chosen": -0.17676720023155212, "rewards/margins": 0.22637633979320526, "rewards/rejected": -0.4031435251235962, "step": 4284 }, { "epoch": 11.731690622861054, "grad_norm": 3.0850353240966797, "learning_rate": 4.1301369863013696e-07, "log_odds_chosen": 2.6758804321289062, "log_odds_ratio": -0.18581408262252808, "logits/chosen": 0.7748359441757202, "logits/rejected": 0.7481218576431274, "logps/chosen": -1.7272032499313354, "logps/rejected": -4.219086647033691, "loss": 0.5684, "nll_loss": 0.549777626991272, "rewards/accuracies": 1.0, "rewards/chosen": -0.17272032797336578, "rewards/margins": 0.24918833374977112, "rewards/rejected": -0.4219086766242981, "step": 4285 }, { "epoch": 11.734428473648187, "grad_norm": 9.739706993103027, "learning_rate": 4.1287671232876706e-07, "log_odds_chosen": 1.2130147218704224, "log_odds_ratio": -0.49845317006111145, "logits/chosen": 1.096566915512085, "logits/rejected": 1.0327975749969482, "logps/chosen": -2.4327611923217773, "logps/rejected": -3.507376194000244, "loss": 0.6302, "nll_loss": 0.5803723335266113, "rewards/accuracies": 0.75, "rewards/chosen": -0.24327611923217773, "rewards/margins": 0.10746148973703384, "rewards/rejected": -0.35073763132095337, "step": 4286 }, { "epoch": 11.737166324435318, "grad_norm": 5.889184951782227, "learning_rate": 4.1273972602739727e-07, "log_odds_chosen": 2.4681618213653564, "log_odds_ratio": -0.5243184566497803, "logits/chosen": 1.1634186506271362, "logits/rejected": 1.2153247594833374, "logps/chosen": -3.0831499099731445, "logps/rejected": -5.4198479652404785, "loss": 0.6948, "nll_loss": 0.6423801183700562, "rewards/accuracies": 0.75, "rewards/chosen": -0.3083150386810303, "rewards/margins": 0.23366978764533997, "rewards/rejected": -0.5419847965240479, "step": 4287 }, { "epoch": 11.739904175222451, "grad_norm": 3.829760789871216, "learning_rate": 4.1260273972602737e-07, "log_odds_chosen": 1.3084802627563477, "log_odds_ratio": -0.31121569871902466, "logits/chosen": 1.0162135362625122, "logits/rejected": 0.9713586568832397, "logps/chosen": -2.1391639709472656, "logps/rejected": -3.326873302459717, "loss": 0.5631, "nll_loss": 0.5320083498954773, "rewards/accuracies": 1.0, "rewards/chosen": -0.21391639113426208, "rewards/margins": 0.11877096444368362, "rewards/rejected": -0.3326873779296875, "step": 4288 }, { "epoch": 11.742642026009582, "grad_norm": 3.609907627105713, "learning_rate": 4.124657534246575e-07, "log_odds_chosen": 2.13834810256958, "log_odds_ratio": -0.24858234822750092, "logits/chosen": 1.2076892852783203, "logits/rejected": 1.066544532775879, "logps/chosen": -1.9555660486221313, "logps/rejected": -3.956599473953247, "loss": 0.7125, "nll_loss": 0.6875924468040466, "rewards/accuracies": 0.875, "rewards/chosen": -0.1955566108226776, "rewards/margins": 0.20010335743427277, "rewards/rejected": -0.3956599533557892, "step": 4289 }, { "epoch": 11.745379876796715, "grad_norm": 5.846634387969971, "learning_rate": 4.1232876712328767e-07, "log_odds_chosen": 2.5919313430786133, "log_odds_ratio": -0.1361694037914276, "logits/chosen": 0.7949544787406921, "logits/rejected": 0.6994709968566895, "logps/chosen": -1.4270775318145752, "logps/rejected": -3.686473846435547, "loss": 0.5076, "nll_loss": 0.49398690462112427, "rewards/accuracies": 1.0, "rewards/chosen": -0.14270775020122528, "rewards/margins": 0.22593963146209717, "rewards/rejected": -0.36864739656448364, "step": 4290 }, { "epoch": 11.748117727583846, "grad_norm": 5.617756366729736, "learning_rate": 4.121917808219178e-07, "log_odds_chosen": 1.504889965057373, "log_odds_ratio": -0.38861554861068726, "logits/chosen": 1.1282683610916138, "logits/rejected": 1.1371815204620361, "logps/chosen": -2.208083152770996, "logps/rejected": -3.656996011734009, "loss": 0.6071, "nll_loss": 0.5682315826416016, "rewards/accuracies": 0.875, "rewards/chosen": -0.22080832719802856, "rewards/margins": 0.14489126205444336, "rewards/rejected": -0.3656996190547943, "step": 4291 }, { "epoch": 11.75085557837098, "grad_norm": 3.359832763671875, "learning_rate": 4.120547945205479e-07, "log_odds_chosen": 1.973928689956665, "log_odds_ratio": -0.28151240944862366, "logits/chosen": 0.9878218173980713, "logits/rejected": 0.9954941272735596, "logps/chosen": -2.1119937896728516, "logps/rejected": -3.991529941558838, "loss": 0.6098, "nll_loss": 0.5816372632980347, "rewards/accuracies": 0.875, "rewards/chosen": -0.21119940280914307, "rewards/margins": 0.18795360624790192, "rewards/rejected": -0.3991529941558838, "step": 4292 }, { "epoch": 11.75359342915811, "grad_norm": 3.2612335681915283, "learning_rate": 4.11917808219178e-07, "log_odds_chosen": 2.3093948364257812, "log_odds_ratio": -0.161067932844162, "logits/chosen": 1.0183104276657104, "logits/rejected": 0.979825496673584, "logps/chosen": -1.7285466194152832, "logps/rejected": -3.81764554977417, "loss": 0.5197, "nll_loss": 0.5036026835441589, "rewards/accuracies": 1.0, "rewards/chosen": -0.17285466194152832, "rewards/margins": 0.20890989899635315, "rewards/rejected": -0.38176456093788147, "step": 4293 }, { "epoch": 11.756331279945243, "grad_norm": 4.72724723815918, "learning_rate": 4.1178082191780823e-07, "log_odds_chosen": 3.1793100833892822, "log_odds_ratio": -0.1722535789012909, "logits/chosen": 0.7707986235618591, "logits/rejected": 0.6564323902130127, "logps/chosen": -1.7685604095458984, "logps/rejected": -4.7488579750061035, "loss": 0.4946, "nll_loss": 0.47734057903289795, "rewards/accuracies": 1.0, "rewards/chosen": -0.17685604095458984, "rewards/margins": 0.2980297803878784, "rewards/rejected": -0.47488582134246826, "step": 4294 }, { "epoch": 11.759069130732374, "grad_norm": 3.8652350902557373, "learning_rate": 4.116438356164383e-07, "log_odds_chosen": 2.641444206237793, "log_odds_ratio": -0.18626464903354645, "logits/chosen": 1.2542365789413452, "logits/rejected": 1.2527198791503906, "logps/chosen": -1.67677903175354, "logps/rejected": -4.1364030838012695, "loss": 0.4935, "nll_loss": 0.4749111533164978, "rewards/accuracies": 1.0, "rewards/chosen": -0.16767790913581848, "rewards/margins": 0.24596238136291504, "rewards/rejected": -0.4136403203010559, "step": 4295 }, { "epoch": 11.761806981519507, "grad_norm": 3.712704658508301, "learning_rate": 4.115068493150685e-07, "log_odds_chosen": 1.0171689987182617, "log_odds_ratio": -0.34317725896835327, "logits/chosen": 0.9903900027275085, "logits/rejected": 0.9454989433288574, "logps/chosen": -1.4614967107772827, "logps/rejected": -2.30245304107666, "loss": 0.5201, "nll_loss": 0.4858209490776062, "rewards/accuracies": 1.0, "rewards/chosen": -0.14614969491958618, "rewards/margins": 0.08409563452005386, "rewards/rejected": -0.23024532198905945, "step": 4296 }, { "epoch": 11.764544832306639, "grad_norm": 6.324388027191162, "learning_rate": 4.1136986301369863e-07, "log_odds_chosen": 0.686931848526001, "log_odds_ratio": -0.747867226600647, "logits/chosen": 1.174208402633667, "logits/rejected": 1.2087082862854004, "logps/chosen": -2.720205307006836, "logps/rejected": -3.323033332824707, "loss": 0.6078, "nll_loss": 0.5329717397689819, "rewards/accuracies": 0.75, "rewards/chosen": -0.27202051877975464, "rewards/margins": 0.06028282269835472, "rewards/rejected": -0.33230334520339966, "step": 4297 }, { "epoch": 11.767282683093772, "grad_norm": 3.921583890914917, "learning_rate": 4.112328767123288e-07, "log_odds_chosen": 3.496098041534424, "log_odds_ratio": -0.09900137782096863, "logits/chosen": 1.3346214294433594, "logits/rejected": 1.3904836177825928, "logps/chosen": -2.257436752319336, "logps/rejected": -5.593316555023193, "loss": 0.5076, "nll_loss": 0.49765080213546753, "rewards/accuracies": 1.0, "rewards/chosen": -0.22574365139007568, "rewards/margins": 0.33358803391456604, "rewards/rejected": -0.5593316555023193, "step": 4298 }, { "epoch": 11.770020533880903, "grad_norm": 4.789371013641357, "learning_rate": 4.110958904109589e-07, "log_odds_chosen": 1.2843775749206543, "log_odds_ratio": -0.5705746412277222, "logits/chosen": 0.785856306552887, "logits/rejected": 0.6936314702033997, "logps/chosen": -2.0313587188720703, "logps/rejected": -3.238327980041504, "loss": 0.5387, "nll_loss": 0.4816446900367737, "rewards/accuracies": 0.75, "rewards/chosen": -0.20313586294651031, "rewards/margins": 0.12069690972566605, "rewards/rejected": -0.32383278012275696, "step": 4299 }, { "epoch": 11.772758384668036, "grad_norm": 3.2066080570220947, "learning_rate": 4.10958904109589e-07, "log_odds_chosen": 2.8839054107666016, "log_odds_ratio": -0.18699580430984497, "logits/chosen": 1.0853413343429565, "logits/rejected": 1.084544062614441, "logps/chosen": -1.7009594440460205, "logps/rejected": -4.369447231292725, "loss": 0.544, "nll_loss": 0.525301456451416, "rewards/accuracies": 1.0, "rewards/chosen": -0.17009595036506653, "rewards/margins": 0.26684874296188354, "rewards/rejected": -0.43694472312927246, "step": 4300 }, { "epoch": 11.775496235455167, "grad_norm": 3.1322638988494873, "learning_rate": 4.108219178082192e-07, "log_odds_chosen": 5.245621204376221, "log_odds_ratio": -0.15049704909324646, "logits/chosen": 1.0457868576049805, "logits/rejected": 1.047971487045288, "logps/chosen": -2.109879970550537, "logps/rejected": -7.218158721923828, "loss": 0.63, "nll_loss": 0.6149042248725891, "rewards/accuracies": 1.0, "rewards/chosen": -0.21098798513412476, "rewards/margins": 0.5108278393745422, "rewards/rejected": -0.721815824508667, "step": 4301 }, { "epoch": 11.7782340862423, "grad_norm": 3.780799627304077, "learning_rate": 4.106849315068493e-07, "log_odds_chosen": 2.679985523223877, "log_odds_ratio": -0.19825443625450134, "logits/chosen": 1.0195324420928955, "logits/rejected": 1.053520679473877, "logps/chosen": -1.8514935970306396, "logps/rejected": -4.38776969909668, "loss": 0.6183, "nll_loss": 0.598505973815918, "rewards/accuracies": 1.0, "rewards/chosen": -0.18514934182167053, "rewards/margins": 0.2536276578903198, "rewards/rejected": -0.43877699971199036, "step": 4302 }, { "epoch": 11.780971937029431, "grad_norm": 4.920316696166992, "learning_rate": 4.1054794520547944e-07, "log_odds_chosen": 2.7050185203552246, "log_odds_ratio": -0.284870445728302, "logits/chosen": 1.141242504119873, "logits/rejected": 1.07012939453125, "logps/chosen": -2.414294719696045, "logps/rejected": -4.994243621826172, "loss": 0.6648, "nll_loss": 0.6362818479537964, "rewards/accuracies": 0.875, "rewards/chosen": -0.24142947793006897, "rewards/margins": 0.2579948902130127, "rewards/rejected": -0.4994243085384369, "step": 4303 }, { "epoch": 11.783709787816564, "grad_norm": 3.2579104900360107, "learning_rate": 4.104109589041096e-07, "log_odds_chosen": 2.330901861190796, "log_odds_ratio": -0.18856535851955414, "logits/chosen": 1.058562994003296, "logits/rejected": 0.9604724645614624, "logps/chosen": -1.5136139392852783, "logps/rejected": -3.648559093475342, "loss": 0.5178, "nll_loss": 0.49890637397766113, "rewards/accuracies": 0.875, "rewards/chosen": -0.1513614058494568, "rewards/margins": 0.21349450945854187, "rewards/rejected": -0.36485591530799866, "step": 4304 }, { "epoch": 11.786447638603697, "grad_norm": 3.510054588317871, "learning_rate": 4.1027397260273974e-07, "log_odds_chosen": 2.2252752780914307, "log_odds_ratio": -0.2850605249404907, "logits/chosen": 0.7435871362686157, "logits/rejected": 0.7375761270523071, "logps/chosen": -2.1083357334136963, "logps/rejected": -4.226363182067871, "loss": 0.554, "nll_loss": 0.5254500508308411, "rewards/accuracies": 0.875, "rewards/chosen": -0.2108335644006729, "rewards/margins": 0.21180272102355957, "rewards/rejected": -0.4226363003253937, "step": 4305 }, { "epoch": 11.789185489390828, "grad_norm": 4.061932563781738, "learning_rate": 4.1013698630136984e-07, "log_odds_chosen": 2.6745481491088867, "log_odds_ratio": -0.2417777180671692, "logits/chosen": 1.0574259757995605, "logits/rejected": 1.1008321046829224, "logps/chosen": -2.7832186222076416, "logps/rejected": -5.212923049926758, "loss": 0.7569, "nll_loss": 0.7327371835708618, "rewards/accuracies": 1.0, "rewards/chosen": -0.27832186222076416, "rewards/margins": 0.24297046661376953, "rewards/rejected": -0.5212923288345337, "step": 4306 }, { "epoch": 11.791923340177961, "grad_norm": 3.6375887393951416, "learning_rate": 4.0999999999999994e-07, "log_odds_chosen": 4.235283374786377, "log_odds_ratio": -0.08541341125965118, "logits/chosen": 1.0471352338790894, "logits/rejected": 0.9839320182800293, "logps/chosen": -1.7689142227172852, "logps/rejected": -5.759968280792236, "loss": 0.4886, "nll_loss": 0.48004454374313354, "rewards/accuracies": 1.0, "rewards/chosen": -0.17689141631126404, "rewards/margins": 0.399105429649353, "rewards/rejected": -0.5759968757629395, "step": 4307 }, { "epoch": 11.794661190965092, "grad_norm": 3.490813970565796, "learning_rate": 4.0986301369863014e-07, "log_odds_chosen": 3.773200273513794, "log_odds_ratio": -0.18290963768959045, "logits/chosen": 1.0800198316574097, "logits/rejected": 1.1370869874954224, "logps/chosen": -1.998555302619934, "logps/rejected": -5.635066032409668, "loss": 0.568, "nll_loss": 0.549748420715332, "rewards/accuracies": 0.875, "rewards/chosen": -0.19985553622245789, "rewards/margins": 0.3636511266231537, "rewards/rejected": -0.5635066628456116, "step": 4308 }, { "epoch": 11.797399041752225, "grad_norm": 3.7790794372558594, "learning_rate": 4.0972602739726024e-07, "log_odds_chosen": 1.5642873048782349, "log_odds_ratio": -0.2425399124622345, "logits/chosen": 1.1165242195129395, "logits/rejected": 1.0705742835998535, "logps/chosen": -2.0146350860595703, "logps/rejected": -3.4525556564331055, "loss": 0.527, "nll_loss": 0.5027489066123962, "rewards/accuracies": 1.0, "rewards/chosen": -0.201463520526886, "rewards/margins": 0.143792062997818, "rewards/rejected": -0.345255583524704, "step": 4309 }, { "epoch": 11.800136892539356, "grad_norm": 3.2794525623321533, "learning_rate": 4.095890410958904e-07, "log_odds_chosen": 3.173241376876831, "log_odds_ratio": -0.12004569917917252, "logits/chosen": 1.0963622331619263, "logits/rejected": 1.0394415855407715, "logps/chosen": -1.4342138767242432, "logps/rejected": -4.321832656860352, "loss": 0.506, "nll_loss": 0.4939921200275421, "rewards/accuracies": 1.0, "rewards/chosen": -0.14342138171195984, "rewards/margins": 0.2887618839740753, "rewards/rejected": -0.43218326568603516, "step": 4310 }, { "epoch": 11.80287474332649, "grad_norm": 3.518986463546753, "learning_rate": 4.0945205479452055e-07, "log_odds_chosen": 2.658642292022705, "log_odds_ratio": -0.2382175773382187, "logits/chosen": 1.2542827129364014, "logits/rejected": 1.1990934610366821, "logps/chosen": -1.6909449100494385, "logps/rejected": -4.193569183349609, "loss": 0.6581, "nll_loss": 0.6342586278915405, "rewards/accuracies": 0.875, "rewards/chosen": -0.1690945029258728, "rewards/margins": 0.25026243925094604, "rewards/rejected": -0.41935694217681885, "step": 4311 }, { "epoch": 11.80561259411362, "grad_norm": 5.148906707763672, "learning_rate": 4.0931506849315065e-07, "log_odds_chosen": 1.8883553743362427, "log_odds_ratio": -0.5795881748199463, "logits/chosen": 0.9791513681411743, "logits/rejected": 1.0088433027267456, "logps/chosen": -2.6904516220092773, "logps/rejected": -4.526042461395264, "loss": 0.6249, "nll_loss": 0.5669714212417603, "rewards/accuracies": 0.75, "rewards/chosen": -0.2690451741218567, "rewards/margins": 0.1835590898990631, "rewards/rejected": -0.4526042342185974, "step": 4312 }, { "epoch": 11.808350444900753, "grad_norm": 3.5477468967437744, "learning_rate": 4.091780821917808e-07, "log_odds_chosen": 3.4345474243164062, "log_odds_ratio": -0.2052481770515442, "logits/chosen": 1.1754035949707031, "logits/rejected": 1.1711302995681763, "logps/chosen": -2.413480758666992, "logps/rejected": -5.7510809898376465, "loss": 0.6147, "nll_loss": 0.5941616296768188, "rewards/accuracies": 1.0, "rewards/chosen": -0.24134807288646698, "rewards/margins": 0.3337600827217102, "rewards/rejected": -0.5751081705093384, "step": 4313 }, { "epoch": 11.811088295687885, "grad_norm": 3.7020673751831055, "learning_rate": 4.090410958904109e-07, "log_odds_chosen": 1.5869231224060059, "log_odds_ratio": -0.35834309458732605, "logits/chosen": 1.0826306343078613, "logits/rejected": 0.9989784955978394, "logps/chosen": -1.8503386974334717, "logps/rejected": -3.312823534011841, "loss": 0.5376, "nll_loss": 0.5017313957214355, "rewards/accuracies": 0.875, "rewards/chosen": -0.1850338578224182, "rewards/margins": 0.1462484896183014, "rewards/rejected": -0.3312823474407196, "step": 4314 }, { "epoch": 11.813826146475018, "grad_norm": 3.4353866577148438, "learning_rate": 4.089041095890411e-07, "log_odds_chosen": 2.338874340057373, "log_odds_ratio": -0.2727346122264862, "logits/chosen": 1.0266129970550537, "logits/rejected": 1.0174823999404907, "logps/chosen": -1.7411997318267822, "logps/rejected": -3.921957015991211, "loss": 0.6147, "nll_loss": 0.5874007344245911, "rewards/accuracies": 1.0, "rewards/chosen": -0.1741199791431427, "rewards/margins": 0.21807576715946198, "rewards/rejected": -0.3921957015991211, "step": 4315 }, { "epoch": 11.816563997262149, "grad_norm": 3.8133349418640137, "learning_rate": 4.087671232876712e-07, "log_odds_chosen": 1.5831577777862549, "log_odds_ratio": -0.22786690294742584, "logits/chosen": 0.9246606826782227, "logits/rejected": 0.8943082094192505, "logps/chosen": -2.210252046585083, "logps/rejected": -3.660244941711426, "loss": 0.5426, "nll_loss": 0.5198166370391846, "rewards/accuracies": 1.0, "rewards/chosen": -0.22102519869804382, "rewards/margins": 0.14499928057193756, "rewards/rejected": -0.3660244941711426, "step": 4316 }, { "epoch": 11.819301848049282, "grad_norm": 4.394146919250488, "learning_rate": 4.0863013698630135e-07, "log_odds_chosen": 2.3518459796905518, "log_odds_ratio": -0.18156170845031738, "logits/chosen": 0.9233721494674683, "logits/rejected": 0.9616836309432983, "logps/chosen": -1.865767240524292, "logps/rejected": -3.9953789710998535, "loss": 0.5099, "nll_loss": 0.4917343556880951, "rewards/accuracies": 1.0, "rewards/chosen": -0.1865767389535904, "rewards/margins": 0.2129611372947693, "rewards/rejected": -0.3995378911495209, "step": 4317 }, { "epoch": 11.822039698836413, "grad_norm": 3.5092785358428955, "learning_rate": 4.084931506849315e-07, "log_odds_chosen": 2.7274787425994873, "log_odds_ratio": -0.218217134475708, "logits/chosen": 0.6940057277679443, "logits/rejected": 0.6231129765510559, "logps/chosen": -1.2593494653701782, "logps/rejected": -3.7345614433288574, "loss": 0.5038, "nll_loss": 0.4819563031196594, "rewards/accuracies": 0.875, "rewards/chosen": -0.12593494355678558, "rewards/margins": 0.24752119183540344, "rewards/rejected": -0.3734561502933502, "step": 4318 }, { "epoch": 11.824777549623546, "grad_norm": 3.6255698204040527, "learning_rate": 4.083561643835616e-07, "log_odds_chosen": 3.0136351585388184, "log_odds_ratio": -0.15397174656391144, "logits/chosen": 0.9330976009368896, "logits/rejected": 0.9697905778884888, "logps/chosen": -1.5133326053619385, "logps/rejected": -4.282377243041992, "loss": 0.5984, "nll_loss": 0.5829991698265076, "rewards/accuracies": 1.0, "rewards/chosen": -0.1513332724571228, "rewards/margins": 0.276904433965683, "rewards/rejected": -0.4282377064228058, "step": 4319 }, { "epoch": 11.827515400410677, "grad_norm": 3.2863595485687256, "learning_rate": 4.0821917808219176e-07, "log_odds_chosen": 1.9948456287384033, "log_odds_ratio": -0.19737693667411804, "logits/chosen": 1.0149116516113281, "logits/rejected": 0.956356406211853, "logps/chosen": -1.295723557472229, "logps/rejected": -3.02553653717041, "loss": 0.5509, "nll_loss": 0.5312052369117737, "rewards/accuracies": 1.0, "rewards/chosen": -0.12957236170768738, "rewards/margins": 0.17298126220703125, "rewards/rejected": -0.302553653717041, "step": 4320 }, { "epoch": 11.83025325119781, "grad_norm": 3.868741035461426, "learning_rate": 4.0808219178082186e-07, "log_odds_chosen": 1.585196614265442, "log_odds_ratio": -0.28036537766456604, "logits/chosen": 1.144321322441101, "logits/rejected": 1.1315689086914062, "logps/chosen": -1.6598727703094482, "logps/rejected": -3.0854034423828125, "loss": 0.5004, "nll_loss": 0.4723535180091858, "rewards/accuracies": 1.0, "rewards/chosen": -0.1659872829914093, "rewards/margins": 0.14255309104919434, "rewards/rejected": -0.30854034423828125, "step": 4321 }, { "epoch": 11.832991101984941, "grad_norm": 6.171474456787109, "learning_rate": 4.0794520547945206e-07, "log_odds_chosen": 1.1584364175796509, "log_odds_ratio": -0.5568090677261353, "logits/chosen": 1.2090089321136475, "logits/rejected": 1.1613633632659912, "logps/chosen": -2.2677955627441406, "logps/rejected": -3.3618545532226562, "loss": 0.5935, "nll_loss": 0.537797212600708, "rewards/accuracies": 0.75, "rewards/chosen": -0.22677956521511078, "rewards/margins": 0.10940591245889664, "rewards/rejected": -0.3361854553222656, "step": 4322 }, { "epoch": 11.835728952772074, "grad_norm": 3.0842678546905518, "learning_rate": 4.0780821917808216e-07, "log_odds_chosen": 4.043439865112305, "log_odds_ratio": -0.17575164139270782, "logits/chosen": 1.1229578256607056, "logits/rejected": 1.180678129196167, "logps/chosen": -1.9632549285888672, "logps/rejected": -5.873865127563477, "loss": 0.5937, "nll_loss": 0.5761247277259827, "rewards/accuracies": 1.0, "rewards/chosen": -0.19632549583911896, "rewards/margins": 0.3910609483718872, "rewards/rejected": -0.5873864889144897, "step": 4323 }, { "epoch": 11.838466803559205, "grad_norm": 3.652421474456787, "learning_rate": 4.076712328767123e-07, "log_odds_chosen": 2.142399549484253, "log_odds_ratio": -0.27317357063293457, "logits/chosen": 0.949295699596405, "logits/rejected": 0.9598350524902344, "logps/chosen": -1.6398191452026367, "logps/rejected": -3.597651481628418, "loss": 0.5063, "nll_loss": 0.47901466488838196, "rewards/accuracies": 0.875, "rewards/chosen": -0.16398191452026367, "rewards/margins": 0.19578325748443604, "rewards/rejected": -0.3597651720046997, "step": 4324 }, { "epoch": 11.841204654346338, "grad_norm": 4.061814308166504, "learning_rate": 4.0753424657534246e-07, "log_odds_chosen": 3.3643620014190674, "log_odds_ratio": -0.16535305976867676, "logits/chosen": 0.7635752558708191, "logits/rejected": 0.753667950630188, "logps/chosen": -1.7829228639602661, "logps/rejected": -4.948339462280273, "loss": 0.5491, "nll_loss": 0.5325989127159119, "rewards/accuracies": 1.0, "rewards/chosen": -0.17829227447509766, "rewards/margins": 0.3165416419506073, "rewards/rejected": -0.49483391642570496, "step": 4325 }, { "epoch": 11.84394250513347, "grad_norm": 3.2421112060546875, "learning_rate": 4.0739726027397256e-07, "log_odds_chosen": 2.6297571659088135, "log_odds_ratio": -0.14765219390392303, "logits/chosen": 1.1224063634872437, "logits/rejected": 1.0996894836425781, "logps/chosen": -1.9815117120742798, "logps/rejected": -4.386326789855957, "loss": 0.5244, "nll_loss": 0.5096692442893982, "rewards/accuracies": 1.0, "rewards/chosen": -0.19815117120742798, "rewards/margins": 0.24048146605491638, "rewards/rejected": -0.43863266706466675, "step": 4326 }, { "epoch": 11.846680355920602, "grad_norm": 3.524683713912964, "learning_rate": 4.072602739726027e-07, "log_odds_chosen": 2.8616480827331543, "log_odds_ratio": -0.1373329907655716, "logits/chosen": 0.8781355619430542, "logits/rejected": 0.8727812170982361, "logps/chosen": -2.025846004486084, "logps/rejected": -4.546809196472168, "loss": 0.6082, "nll_loss": 0.5944843292236328, "rewards/accuracies": 1.0, "rewards/chosen": -0.20258457958698273, "rewards/margins": 0.2520963251590729, "rewards/rejected": -0.4546809196472168, "step": 4327 }, { "epoch": 11.849418206707734, "grad_norm": 4.079573154449463, "learning_rate": 4.0712328767123287e-07, "log_odds_chosen": 4.020214080810547, "log_odds_ratio": -0.1322423666715622, "logits/chosen": 0.9149259328842163, "logits/rejected": 0.9434306025505066, "logps/chosen": -3.436584949493408, "logps/rejected": -7.376838684082031, "loss": 0.7116, "nll_loss": 0.6983636617660522, "rewards/accuracies": 1.0, "rewards/chosen": -0.3436585068702698, "rewards/margins": 0.3940253257751465, "rewards/rejected": -0.737683892250061, "step": 4328 }, { "epoch": 11.852156057494867, "grad_norm": 6.665925025939941, "learning_rate": 4.06986301369863e-07, "log_odds_chosen": 0.9796187281608582, "log_odds_ratio": -0.4101892113685608, "logits/chosen": 1.0349761247634888, "logits/rejected": 0.8932996392250061, "logps/chosen": -2.705379009246826, "logps/rejected": -3.54050350189209, "loss": 0.696, "nll_loss": 0.6550158262252808, "rewards/accuracies": 0.875, "rewards/chosen": -0.2705379128456116, "rewards/margins": 0.08351244777441025, "rewards/rejected": -0.3540503680706024, "step": 4329 }, { "epoch": 11.854893908281998, "grad_norm": 4.178832530975342, "learning_rate": 4.068493150684931e-07, "log_odds_chosen": 1.7386374473571777, "log_odds_ratio": -0.31167545914649963, "logits/chosen": 1.0262383222579956, "logits/rejected": 1.080068588256836, "logps/chosen": -2.466982841491699, "logps/rejected": -4.13938045501709, "loss": 0.5664, "nll_loss": 0.535216748714447, "rewards/accuracies": 0.875, "rewards/chosen": -0.2466982901096344, "rewards/margins": 0.16723981499671936, "rewards/rejected": -0.41393810510635376, "step": 4330 }, { "epoch": 11.85763175906913, "grad_norm": 8.214484214782715, "learning_rate": 4.0671232876712327e-07, "log_odds_chosen": 4.079183101654053, "log_odds_ratio": -0.31605327129364014, "logits/chosen": 1.1465141773223877, "logits/rejected": 1.128625750541687, "logps/chosen": -2.09621524810791, "logps/rejected": -5.855201244354248, "loss": 0.6248, "nll_loss": 0.5932186841964722, "rewards/accuracies": 0.875, "rewards/chosen": -0.20962153375148773, "rewards/margins": 0.3758985996246338, "rewards/rejected": -0.5855201482772827, "step": 4331 }, { "epoch": 11.860369609856264, "grad_norm": 4.309758186340332, "learning_rate": 4.065753424657534e-07, "log_odds_chosen": 2.2910408973693848, "log_odds_ratio": -0.34052687883377075, "logits/chosen": 1.0918920040130615, "logits/rejected": 1.0540854930877686, "logps/chosen": -1.8534259796142578, "logps/rejected": -3.970440626144409, "loss": 0.5727, "nll_loss": 0.5386705994606018, "rewards/accuracies": 0.875, "rewards/chosen": -0.18534260988235474, "rewards/margins": 0.21170146763324738, "rewards/rejected": -0.39704403281211853, "step": 4332 }, { "epoch": 11.863107460643395, "grad_norm": 3.5202138423919678, "learning_rate": 4.064383561643835e-07, "log_odds_chosen": 2.03433895111084, "log_odds_ratio": -0.29138702154159546, "logits/chosen": 1.2146254777908325, "logits/rejected": 1.222495436668396, "logps/chosen": -1.856391429901123, "logps/rejected": -3.6829164028167725, "loss": 0.5066, "nll_loss": 0.4774639308452606, "rewards/accuracies": 0.875, "rewards/chosen": -0.1856391429901123, "rewards/margins": 0.18265250325202942, "rewards/rejected": -0.3682916462421417, "step": 4333 }, { "epoch": 11.865845311430528, "grad_norm": 5.064676284790039, "learning_rate": 4.063013698630137e-07, "log_odds_chosen": 1.3770010471343994, "log_odds_ratio": -0.38706955313682556, "logits/chosen": 1.2856414318084717, "logits/rejected": 1.2338359355926514, "logps/chosen": -2.5384368896484375, "logps/rejected": -3.8047595024108887, "loss": 0.6065, "nll_loss": 0.5678426623344421, "rewards/accuracies": 0.875, "rewards/chosen": -0.2538437247276306, "rewards/margins": 0.12663227319717407, "rewards/rejected": -0.3804759979248047, "step": 4334 }, { "epoch": 11.868583162217659, "grad_norm": 3.532716989517212, "learning_rate": 4.0616438356164383e-07, "log_odds_chosen": 4.430674076080322, "log_odds_ratio": -0.2131849229335785, "logits/chosen": 1.0266215801239014, "logits/rejected": 1.0779657363891602, "logps/chosen": -2.010124444961548, "logps/rejected": -6.342718601226807, "loss": 0.6008, "nll_loss": 0.5794699192047119, "rewards/accuracies": 0.875, "rewards/chosen": -0.20101244747638702, "rewards/margins": 0.43325942754745483, "rewards/rejected": -0.6342718601226807, "step": 4335 }, { "epoch": 11.871321013004792, "grad_norm": 3.391671657562256, "learning_rate": 4.06027397260274e-07, "log_odds_chosen": 1.9245035648345947, "log_odds_ratio": -0.2517911493778229, "logits/chosen": 1.2301275730133057, "logits/rejected": 1.1708780527114868, "logps/chosen": -1.3682228326797485, "logps/rejected": -3.0575828552246094, "loss": 0.4856, "nll_loss": 0.4604421854019165, "rewards/accuracies": 1.0, "rewards/chosen": -0.13682228326797485, "rewards/margins": 0.16893598437309265, "rewards/rejected": -0.3057582676410675, "step": 4336 }, { "epoch": 11.874058863791923, "grad_norm": 3.6031296253204346, "learning_rate": 4.058904109589041e-07, "log_odds_chosen": 2.719630718231201, "log_odds_ratio": -0.18224424123764038, "logits/chosen": 1.1856231689453125, "logits/rejected": 1.2384047508239746, "logps/chosen": -2.1045174598693848, "logps/rejected": -4.667472839355469, "loss": 0.5808, "nll_loss": 0.5626007318496704, "rewards/accuracies": 1.0, "rewards/chosen": -0.21045175194740295, "rewards/margins": 0.2562956213951111, "rewards/rejected": -0.46674734354019165, "step": 4337 }, { "epoch": 11.876796714579056, "grad_norm": 3.7237014770507812, "learning_rate": 4.0575342465753423e-07, "log_odds_chosen": 3.6521270275115967, "log_odds_ratio": -0.16326341032981873, "logits/chosen": 1.212178349494934, "logits/rejected": 1.2726118564605713, "logps/chosen": -1.4496307373046875, "logps/rejected": -4.8632588386535645, "loss": 0.4644, "nll_loss": 0.4480499029159546, "rewards/accuracies": 0.875, "rewards/chosen": -0.1449630707502365, "rewards/margins": 0.3413628339767456, "rewards/rejected": -0.4863259196281433, "step": 4338 }, { "epoch": 11.879534565366187, "grad_norm": 3.181061267852783, "learning_rate": 4.056164383561644e-07, "log_odds_chosen": 2.680044174194336, "log_odds_ratio": -0.37439918518066406, "logits/chosen": 1.3727033138275146, "logits/rejected": 1.3549550771713257, "logps/chosen": -2.3881185054779053, "logps/rejected": -4.986021995544434, "loss": 0.5972, "nll_loss": 0.5597638487815857, "rewards/accuracies": 0.875, "rewards/chosen": -0.23881185054779053, "rewards/margins": 0.2597903311252594, "rewards/rejected": -0.4986021816730499, "step": 4339 }, { "epoch": 11.88227241615332, "grad_norm": 4.4822187423706055, "learning_rate": 4.054794520547945e-07, "log_odds_chosen": 2.1536648273468018, "log_odds_ratio": -0.31562331318855286, "logits/chosen": 0.7812575101852417, "logits/rejected": 0.6974118947982788, "logps/chosen": -2.058840751647949, "logps/rejected": -4.033307075500488, "loss": 0.5668, "nll_loss": 0.5352813005447388, "rewards/accuracies": 1.0, "rewards/chosen": -0.20588409900665283, "rewards/margins": 0.19744664430618286, "rewards/rejected": -0.4033307433128357, "step": 4340 }, { "epoch": 11.885010266940451, "grad_norm": 2.8291432857513428, "learning_rate": 4.0534246575342463e-07, "log_odds_chosen": 3.2470650672912598, "log_odds_ratio": -0.2073923796415329, "logits/chosen": 0.9888430833816528, "logits/rejected": 1.0230998992919922, "logps/chosen": -1.6031608581542969, "logps/rejected": -4.676692008972168, "loss": 0.5196, "nll_loss": 0.4988339841365814, "rewards/accuracies": 0.875, "rewards/chosen": -0.1603160798549652, "rewards/margins": 0.30735310912132263, "rewards/rejected": -0.46766918897628784, "step": 4341 }, { "epoch": 11.887748117727584, "grad_norm": 3.9623165130615234, "learning_rate": 4.052054794520548e-07, "log_odds_chosen": 2.356750249862671, "log_odds_ratio": -0.19558700919151306, "logits/chosen": 0.945813775062561, "logits/rejected": 0.8214671015739441, "logps/chosen": -1.4459388256072998, "logps/rejected": -3.5946784019470215, "loss": 0.5025, "nll_loss": 0.4829797148704529, "rewards/accuracies": 1.0, "rewards/chosen": -0.14459389448165894, "rewards/margins": 0.21487395465373993, "rewards/rejected": -0.35946786403656006, "step": 4342 }, { "epoch": 11.890485968514716, "grad_norm": 3.4390716552734375, "learning_rate": 4.0506849315068494e-07, "log_odds_chosen": 1.6386423110961914, "log_odds_ratio": -0.2881094217300415, "logits/chosen": 0.8933255672454834, "logits/rejected": 0.7959396839141846, "logps/chosen": -1.6561775207519531, "logps/rejected": -3.0701494216918945, "loss": 0.5041, "nll_loss": 0.47529104351997375, "rewards/accuracies": 0.875, "rewards/chosen": -0.16561776399612427, "rewards/margins": 0.14139720797538757, "rewards/rejected": -0.30701494216918945, "step": 4343 }, { "epoch": 11.893223819301848, "grad_norm": 3.4660844802856445, "learning_rate": 4.0493150684931504e-07, "log_odds_chosen": 2.547271251678467, "log_odds_ratio": -0.1623011976480484, "logits/chosen": 1.0854597091674805, "logits/rejected": 1.0518145561218262, "logps/chosen": -1.8605644702911377, "logps/rejected": -4.070932388305664, "loss": 0.494, "nll_loss": 0.4777795672416687, "rewards/accuracies": 1.0, "rewards/chosen": -0.186056450009346, "rewards/margins": 0.22103680670261383, "rewards/rejected": -0.4070932865142822, "step": 4344 }, { "epoch": 11.89596167008898, "grad_norm": 3.34554123878479, "learning_rate": 4.0479452054794514e-07, "log_odds_chosen": 3.749906539916992, "log_odds_ratio": -0.0665663480758667, "logits/chosen": 1.4235312938690186, "logits/rejected": 1.4139761924743652, "logps/chosen": -1.510237455368042, "logps/rejected": -4.9484663009643555, "loss": 0.5036, "nll_loss": 0.49689924716949463, "rewards/accuracies": 1.0, "rewards/chosen": -0.1510237604379654, "rewards/margins": 0.3438229262828827, "rewards/rejected": -0.4948466420173645, "step": 4345 }, { "epoch": 11.898699520876113, "grad_norm": 5.209425926208496, "learning_rate": 4.0465753424657534e-07, "log_odds_chosen": 1.930124282836914, "log_odds_ratio": -0.34728285670280457, "logits/chosen": 0.8599154353141785, "logits/rejected": 0.7430790662765503, "logps/chosen": -2.004570722579956, "logps/rejected": -3.7238712310791016, "loss": 0.6113, "nll_loss": 0.5765942335128784, "rewards/accuracies": 0.875, "rewards/chosen": -0.20045706629753113, "rewards/margins": 0.17193004488945007, "rewards/rejected": -0.3723871111869812, "step": 4346 }, { "epoch": 11.901437371663244, "grad_norm": 3.5845980644226074, "learning_rate": 4.0452054794520544e-07, "log_odds_chosen": 2.2292346954345703, "log_odds_ratio": -0.22516947984695435, "logits/chosen": 1.0940771102905273, "logits/rejected": 1.0696864128112793, "logps/chosen": -2.202920913696289, "logps/rejected": -4.242987155914307, "loss": 0.5387, "nll_loss": 0.5161995887756348, "rewards/accuracies": 1.0, "rewards/chosen": -0.2202921062707901, "rewards/margins": 0.2040066123008728, "rewards/rejected": -0.4242987334728241, "step": 4347 }, { "epoch": 11.904175222450377, "grad_norm": 3.418027877807617, "learning_rate": 4.043835616438356e-07, "log_odds_chosen": 2.7375268936157227, "log_odds_ratio": -0.224867045879364, "logits/chosen": 1.2060556411743164, "logits/rejected": 1.2198870182037354, "logps/chosen": -1.4011644124984741, "logps/rejected": -3.929481029510498, "loss": 0.518, "nll_loss": 0.4955041706562042, "rewards/accuracies": 1.0, "rewards/chosen": -0.14011645317077637, "rewards/margins": 0.25283166766166687, "rewards/rejected": -0.39294812083244324, "step": 4348 }, { "epoch": 11.906913073237508, "grad_norm": 5.352930545806885, "learning_rate": 4.0424657534246574e-07, "log_odds_chosen": 2.635904312133789, "log_odds_ratio": -0.1367400586605072, "logits/chosen": 1.0327427387237549, "logits/rejected": 1.0793559551239014, "logps/chosen": -2.1082870960235596, "logps/rejected": -4.608750343322754, "loss": 0.7449, "nll_loss": 0.7312500476837158, "rewards/accuracies": 1.0, "rewards/chosen": -0.21082869172096252, "rewards/margins": 0.25004637241363525, "rewards/rejected": -0.4608750641345978, "step": 4349 }, { "epoch": 11.90965092402464, "grad_norm": 3.3212177753448486, "learning_rate": 4.041095890410959e-07, "log_odds_chosen": 2.9732046127319336, "log_odds_ratio": -0.1458117663860321, "logits/chosen": 1.084097146987915, "logits/rejected": 1.1623047590255737, "logps/chosen": -2.0282092094421387, "logps/rejected": -4.8314385414123535, "loss": 0.4785, "nll_loss": 0.4638822376728058, "rewards/accuracies": 1.0, "rewards/chosen": -0.20282094180583954, "rewards/margins": 0.2803229093551636, "rewards/rejected": -0.4831438660621643, "step": 4350 }, { "epoch": 11.912388774811772, "grad_norm": 3.7330384254455566, "learning_rate": 4.03972602739726e-07, "log_odds_chosen": 2.387716293334961, "log_odds_ratio": -0.14000466465950012, "logits/chosen": 1.095012903213501, "logits/rejected": 1.0854501724243164, "logps/chosen": -1.8986966609954834, "logps/rejected": -4.081188678741455, "loss": 0.4716, "nll_loss": 0.4576147794723511, "rewards/accuracies": 1.0, "rewards/chosen": -0.18986965715885162, "rewards/margins": 0.21824923157691956, "rewards/rejected": -0.4081189036369324, "step": 4351 }, { "epoch": 11.915126625598905, "grad_norm": 4.327950477600098, "learning_rate": 4.038356164383561e-07, "log_odds_chosen": 2.9890403747558594, "log_odds_ratio": -0.2425147294998169, "logits/chosen": 0.9196645617485046, "logits/rejected": 0.9007697105407715, "logps/chosen": -1.96633780002594, "logps/rejected": -4.80671501159668, "loss": 0.5429, "nll_loss": 0.5186212062835693, "rewards/accuracies": 1.0, "rewards/chosen": -0.19663378596305847, "rewards/margins": 0.284037709236145, "rewards/rejected": -0.4806714653968811, "step": 4352 }, { "epoch": 11.917864476386036, "grad_norm": 4.830723762512207, "learning_rate": 4.036986301369863e-07, "log_odds_chosen": 1.8625036478042603, "log_odds_ratio": -0.29184970259666443, "logits/chosen": 1.3001707792282104, "logits/rejected": 1.2928799390792847, "logps/chosen": -1.8248381614685059, "logps/rejected": -3.5585527420043945, "loss": 0.6083, "nll_loss": 0.5791341662406921, "rewards/accuracies": 1.0, "rewards/chosen": -0.18248380720615387, "rewards/margins": 0.17337146401405334, "rewards/rejected": -0.3558552861213684, "step": 4353 }, { "epoch": 11.92060232717317, "grad_norm": 6.599592208862305, "learning_rate": 4.035616438356164e-07, "log_odds_chosen": 4.04550838470459, "log_odds_ratio": -0.12186142802238464, "logits/chosen": 1.1498069763183594, "logits/rejected": 1.1679186820983887, "logps/chosen": -2.8797717094421387, "logps/rejected": -6.796319484710693, "loss": 0.7325, "nll_loss": 0.7203420400619507, "rewards/accuracies": 1.0, "rewards/chosen": -0.2879771590232849, "rewards/margins": 0.3916547894477844, "rewards/rejected": -0.6796319484710693, "step": 4354 }, { "epoch": 11.923340177960302, "grad_norm": 4.019500255584717, "learning_rate": 4.0342465753424655e-07, "log_odds_chosen": 5.019716262817383, "log_odds_ratio": -0.07998579740524292, "logits/chosen": 0.99568110704422, "logits/rejected": 1.0070346593856812, "logps/chosen": -2.2316839694976807, "logps/rejected": -6.963844299316406, "loss": 0.6864, "nll_loss": 0.6783915758132935, "rewards/accuracies": 1.0, "rewards/chosen": -0.22316838800907135, "rewards/margins": 0.47321605682373047, "rewards/rejected": -0.6963844299316406, "step": 4355 }, { "epoch": 11.926078028747433, "grad_norm": 3.7653005123138428, "learning_rate": 4.032876712328767e-07, "log_odds_chosen": 3.398831367492676, "log_odds_ratio": -0.13522212207317352, "logits/chosen": 0.9742818474769592, "logits/rejected": 0.9394227862358093, "logps/chosen": -2.631765842437744, "logps/rejected": -5.930595874786377, "loss": 0.6112, "nll_loss": 0.5976853966712952, "rewards/accuracies": 1.0, "rewards/chosen": -0.2631765902042389, "rewards/margins": 0.32988297939300537, "rewards/rejected": -0.5930595397949219, "step": 4356 }, { "epoch": 11.928815879534564, "grad_norm": 5.903000831604004, "learning_rate": 4.0315068493150685e-07, "log_odds_chosen": 0.1617838740348816, "log_odds_ratio": -0.8017080426216125, "logits/chosen": 0.8881599307060242, "logits/rejected": 0.924710750579834, "logps/chosen": -2.386536121368408, "logps/rejected": -2.5080695152282715, "loss": 0.7289, "nll_loss": 0.6487002968788147, "rewards/accuracies": 0.625, "rewards/chosen": -0.23865360021591187, "rewards/margins": 0.01215334888547659, "rewards/rejected": -0.2508069574832916, "step": 4357 }, { "epoch": 11.931553730321697, "grad_norm": 5.6224565505981445, "learning_rate": 4.0301369863013695e-07, "log_odds_chosen": 2.194934129714966, "log_odds_ratio": -0.33768871426582336, "logits/chosen": 1.0056209564208984, "logits/rejected": 0.9685001969337463, "logps/chosen": -2.3625268936157227, "logps/rejected": -4.337465286254883, "loss": 0.7158, "nll_loss": 0.6820685267448425, "rewards/accuracies": 0.875, "rewards/chosen": -0.23625269532203674, "rewards/margins": 0.19749383628368378, "rewards/rejected": -0.4337465167045593, "step": 4358 }, { "epoch": 11.93429158110883, "grad_norm": 3.6431612968444824, "learning_rate": 4.028767123287671e-07, "log_odds_chosen": 3.555293083190918, "log_odds_ratio": -0.17946627736091614, "logits/chosen": 1.133263111114502, "logits/rejected": 1.0456172227859497, "logps/chosen": -1.849949836730957, "logps/rejected": -5.0539703369140625, "loss": 0.6433, "nll_loss": 0.6253576278686523, "rewards/accuracies": 1.0, "rewards/chosen": -0.18499498069286346, "rewards/margins": 0.3204020857810974, "rewards/rejected": -0.5053970813751221, "step": 4359 }, { "epoch": 11.937029431895962, "grad_norm": 5.0264177322387695, "learning_rate": 4.0273972602739726e-07, "log_odds_chosen": 3.9517128467559814, "log_odds_ratio": -0.263536274433136, "logits/chosen": 1.2000560760498047, "logits/rejected": 1.1903083324432373, "logps/chosen": -2.389108657836914, "logps/rejected": -6.214742660522461, "loss": 0.7482, "nll_loss": 0.7218928933143616, "rewards/accuracies": 0.875, "rewards/chosen": -0.23891086876392365, "rewards/margins": 0.38256341218948364, "rewards/rejected": -0.6214742660522461, "step": 4360 }, { "epoch": 11.939767282683095, "grad_norm": 4.23534631729126, "learning_rate": 4.0260273972602736e-07, "log_odds_chosen": 3.2410244941711426, "log_odds_ratio": -0.4903063476085663, "logits/chosen": 0.9393109083175659, "logits/rejected": 0.9508470296859741, "logps/chosen": -2.2624704837799072, "logps/rejected": -5.339113235473633, "loss": 0.6494, "nll_loss": 0.6003994345664978, "rewards/accuracies": 0.875, "rewards/chosen": -0.22624704241752625, "rewards/margins": 0.30766427516937256, "rewards/rejected": -0.5339113473892212, "step": 4361 }, { "epoch": 11.942505133470226, "grad_norm": 3.336488962173462, "learning_rate": 4.024657534246575e-07, "log_odds_chosen": 3.803908109664917, "log_odds_ratio": -0.08250787109136581, "logits/chosen": 1.0688475370407104, "logits/rejected": 1.0412184000015259, "logps/chosen": -1.4019393920898438, "logps/rejected": -4.807668685913086, "loss": 0.5628, "nll_loss": 0.5545779466629028, "rewards/accuracies": 1.0, "rewards/chosen": -0.14019393920898438, "rewards/margins": 0.34057289361953735, "rewards/rejected": -0.48076683282852173, "step": 4362 }, { "epoch": 11.945242984257359, "grad_norm": 3.3491666316986084, "learning_rate": 4.0232876712328766e-07, "log_odds_chosen": 2.0668351650238037, "log_odds_ratio": -0.1822134554386139, "logits/chosen": 1.0567967891693115, "logits/rejected": 0.9865438342094421, "logps/chosen": -1.6381268501281738, "logps/rejected": -3.484269380569458, "loss": 0.497, "nll_loss": 0.47879767417907715, "rewards/accuracies": 1.0, "rewards/chosen": -0.16381271183490753, "rewards/margins": 0.18461422622203827, "rewards/rejected": -0.3484269082546234, "step": 4363 }, { "epoch": 11.94798083504449, "grad_norm": 3.425750732421875, "learning_rate": 4.021917808219178e-07, "log_odds_chosen": 1.942533254623413, "log_odds_ratio": -0.23682540655136108, "logits/chosen": 1.172716736793518, "logits/rejected": 1.0503968000411987, "logps/chosen": -1.2376424074172974, "logps/rejected": -2.908521890640259, "loss": 0.5101, "nll_loss": 0.4864082336425781, "rewards/accuracies": 0.875, "rewards/chosen": -0.12376424670219421, "rewards/margins": 0.16708795726299286, "rewards/rejected": -0.2908521890640259, "step": 4364 }, { "epoch": 11.950718685831623, "grad_norm": 3.1542887687683105, "learning_rate": 4.020547945205479e-07, "log_odds_chosen": 2.9283838272094727, "log_odds_ratio": -0.13780662417411804, "logits/chosen": 1.1792975664138794, "logits/rejected": 1.2312507629394531, "logps/chosen": -1.4816406965255737, "logps/rejected": -4.182840347290039, "loss": 0.5139, "nll_loss": 0.5001465678215027, "rewards/accuracies": 1.0, "rewards/chosen": -0.1481640636920929, "rewards/margins": 0.27011990547180176, "rewards/rejected": -0.41828399896621704, "step": 4365 }, { "epoch": 11.953456536618754, "grad_norm": 3.9265761375427246, "learning_rate": 4.0191780821917806e-07, "log_odds_chosen": 0.9706476330757141, "log_odds_ratio": -0.3495543301105499, "logits/chosen": 1.1512640714645386, "logits/rejected": 1.0799108743667603, "logps/chosen": -1.2605353593826294, "logps/rejected": -2.0179190635681152, "loss": 0.437, "nll_loss": 0.4020325839519501, "rewards/accuracies": 1.0, "rewards/chosen": -0.12605354189872742, "rewards/margins": 0.07573836296796799, "rewards/rejected": -0.2017918974161148, "step": 4366 }, { "epoch": 11.956194387405887, "grad_norm": 3.4855895042419434, "learning_rate": 4.017808219178082e-07, "log_odds_chosen": 2.249314785003662, "log_odds_ratio": -0.23618505895137787, "logits/chosen": 0.9824383854866028, "logits/rejected": 0.87052983045578, "logps/chosen": -1.2321367263793945, "logps/rejected": -3.2400975227355957, "loss": 0.5106, "nll_loss": 0.4869513511657715, "rewards/accuracies": 1.0, "rewards/chosen": -0.12321366369724274, "rewards/margins": 0.20079606771469116, "rewards/rejected": -0.3240097165107727, "step": 4367 }, { "epoch": 11.958932238193018, "grad_norm": 3.9823086261749268, "learning_rate": 4.016438356164383e-07, "log_odds_chosen": 2.596328020095825, "log_odds_ratio": -0.08896314352750778, "logits/chosen": 1.3013193607330322, "logits/rejected": 1.320654034614563, "logps/chosen": -2.2884740829467773, "logps/rejected": -4.710030555725098, "loss": 0.5883, "nll_loss": 0.5793754458427429, "rewards/accuracies": 1.0, "rewards/chosen": -0.2288474291563034, "rewards/margins": 0.24215558171272278, "rewards/rejected": -0.471002995967865, "step": 4368 }, { "epoch": 11.961670088980151, "grad_norm": 5.139809608459473, "learning_rate": 4.0150684931506847e-07, "log_odds_chosen": 1.5095200538635254, "log_odds_ratio": -0.28060951828956604, "logits/chosen": 0.9263194799423218, "logits/rejected": 0.8351245522499084, "logps/chosen": -1.6976702213287354, "logps/rejected": -3.0052924156188965, "loss": 0.5156, "nll_loss": 0.4875035881996155, "rewards/accuracies": 1.0, "rewards/chosen": -0.16976703703403473, "rewards/margins": 0.1307622194290161, "rewards/rejected": -0.30052924156188965, "step": 4369 }, { "epoch": 11.964407939767282, "grad_norm": 4.049803256988525, "learning_rate": 4.013698630136986e-07, "log_odds_chosen": 3.299954891204834, "log_odds_ratio": -0.12983691692352295, "logits/chosen": 1.1689547300338745, "logits/rejected": 1.1975455284118652, "logps/chosen": -2.0228540897369385, "logps/rejected": -5.155845642089844, "loss": 0.5676, "nll_loss": 0.5545822978019714, "rewards/accuracies": 1.0, "rewards/chosen": -0.20228540897369385, "rewards/margins": 0.31329914927482605, "rewards/rejected": -0.5155845880508423, "step": 4370 }, { "epoch": 11.967145790554415, "grad_norm": 3.591470956802368, "learning_rate": 4.0123287671232877e-07, "log_odds_chosen": 3.1372761726379395, "log_odds_ratio": -0.10236530750989914, "logits/chosen": 0.9840586185455322, "logits/rejected": 1.0119792222976685, "logps/chosen": -2.0571372509002686, "logps/rejected": -4.911148548126221, "loss": 0.5532, "nll_loss": 0.5430046916007996, "rewards/accuracies": 1.0, "rewards/chosen": -0.20571374893188477, "rewards/margins": 0.2854011356830597, "rewards/rejected": -0.49111485481262207, "step": 4371 }, { "epoch": 11.969883641341546, "grad_norm": 3.751145362854004, "learning_rate": 4.0109589041095887e-07, "log_odds_chosen": 0.9205887913703918, "log_odds_ratio": -0.5156442523002625, "logits/chosen": 1.256550669670105, "logits/rejected": 1.1726188659667969, "logps/chosen": -1.8200920820236206, "logps/rejected": -2.6409239768981934, "loss": 0.5087, "nll_loss": 0.4571668803691864, "rewards/accuracies": 0.875, "rewards/chosen": -0.18200920522212982, "rewards/margins": 0.08208319544792175, "rewards/rejected": -0.2640923857688904, "step": 4372 }, { "epoch": 11.97262149212868, "grad_norm": 4.354587554931641, "learning_rate": 4.00958904109589e-07, "log_odds_chosen": 2.4713897705078125, "log_odds_ratio": -0.18278074264526367, "logits/chosen": 0.794208288192749, "logits/rejected": 0.7424375414848328, "logps/chosen": -1.4713574647903442, "logps/rejected": -3.6409525871276855, "loss": 0.6038, "nll_loss": 0.5855216383934021, "rewards/accuracies": 1.0, "rewards/chosen": -0.14713574945926666, "rewards/margins": 0.21695956587791443, "rewards/rejected": -0.3640953004360199, "step": 4373 }, { "epoch": 11.97535934291581, "grad_norm": 3.772512197494507, "learning_rate": 4.008219178082192e-07, "log_odds_chosen": 2.059335470199585, "log_odds_ratio": -0.24954372644424438, "logits/chosen": 0.9995229840278625, "logits/rejected": 0.9232926368713379, "logps/chosen": -1.4087729454040527, "logps/rejected": -3.2716240882873535, "loss": 0.5004, "nll_loss": 0.47547534108161926, "rewards/accuracies": 1.0, "rewards/chosen": -0.14087730646133423, "rewards/margins": 0.1862851083278656, "rewards/rejected": -0.32716241478919983, "step": 4374 }, { "epoch": 11.978097193702943, "grad_norm": 3.7027645111083984, "learning_rate": 4.006849315068493e-07, "log_odds_chosen": 0.9706728458404541, "log_odds_ratio": -0.42388033866882324, "logits/chosen": 0.7715018391609192, "logits/rejected": 0.7576650977134705, "logps/chosen": -2.27874755859375, "logps/rejected": -3.201631546020508, "loss": 0.6546, "nll_loss": 0.6121878623962402, "rewards/accuracies": 0.875, "rewards/chosen": -0.227874755859375, "rewards/margins": 0.09228841215372086, "rewards/rejected": -0.32016316056251526, "step": 4375 }, { "epoch": 11.980835044490075, "grad_norm": 3.396232843399048, "learning_rate": 4.0054794520547943e-07, "log_odds_chosen": 1.6032185554504395, "log_odds_ratio": -0.2560875117778778, "logits/chosen": 0.9426146149635315, "logits/rejected": 0.9497417211532593, "logps/chosen": -1.7932155132293701, "logps/rejected": -3.2328500747680664, "loss": 0.4939, "nll_loss": 0.4683019816875458, "rewards/accuracies": 0.875, "rewards/chosen": -0.17932157218456268, "rewards/margins": 0.14396344125270844, "rewards/rejected": -0.3232850134372711, "step": 4376 }, { "epoch": 11.983572895277208, "grad_norm": 3.3025171756744385, "learning_rate": 4.004109589041096e-07, "log_odds_chosen": 3.8772687911987305, "log_odds_ratio": -0.17180754244327545, "logits/chosen": 1.0560253858566284, "logits/rejected": 1.0530085563659668, "logps/chosen": -1.7870243787765503, "logps/rejected": -5.511824607849121, "loss": 0.4704, "nll_loss": 0.4532429277896881, "rewards/accuracies": 1.0, "rewards/chosen": -0.1787024438381195, "rewards/margins": 0.37248003482818604, "rewards/rejected": -0.5511823892593384, "step": 4377 }, { "epoch": 11.986310746064339, "grad_norm": 4.287156105041504, "learning_rate": 4.0027397260273973e-07, "log_odds_chosen": 1.8905117511749268, "log_odds_ratio": -0.4118572473526001, "logits/chosen": 0.9610854387283325, "logits/rejected": 0.9400666952133179, "logps/chosen": -1.8372156620025635, "logps/rejected": -3.570159912109375, "loss": 0.5186, "nll_loss": 0.4774271249771118, "rewards/accuracies": 0.75, "rewards/chosen": -0.18372154235839844, "rewards/margins": 0.17329442501068115, "rewards/rejected": -0.3570159673690796, "step": 4378 }, { "epoch": 11.989048596851472, "grad_norm": 3.753845453262329, "learning_rate": 4.0013698630136983e-07, "log_odds_chosen": 3.0515427589416504, "log_odds_ratio": -0.26812902092933655, "logits/chosen": 0.8715479969978333, "logits/rejected": 0.86817467212677, "logps/chosen": -2.1127467155456543, "logps/rejected": -5.093296051025391, "loss": 0.6543, "nll_loss": 0.6275010704994202, "rewards/accuracies": 0.875, "rewards/chosen": -0.21127468347549438, "rewards/margins": 0.2980549931526184, "rewards/rejected": -0.5093296766281128, "step": 4379 }, { "epoch": 11.991786447638603, "grad_norm": 3.835463285446167, "learning_rate": 4e-07, "log_odds_chosen": 2.9103729724884033, "log_odds_ratio": -0.15043935179710388, "logits/chosen": 1.279371976852417, "logits/rejected": 1.3071184158325195, "logps/chosen": -1.733396291732788, "logps/rejected": -4.391026020050049, "loss": 0.6239, "nll_loss": 0.6088425517082214, "rewards/accuracies": 1.0, "rewards/chosen": -0.1733396351337433, "rewards/margins": 0.26576298475265503, "rewards/rejected": -0.4391026198863983, "step": 4380 }, { "epoch": 11.994524298425736, "grad_norm": 3.4732301235198975, "learning_rate": 3.9986301369863013e-07, "log_odds_chosen": 3.355120897293091, "log_odds_ratio": -0.18339891731739044, "logits/chosen": 0.8915603756904602, "logits/rejected": 0.8011782765388489, "logps/chosen": -1.5534461736679077, "logps/rejected": -4.638893127441406, "loss": 0.6941, "nll_loss": 0.6757725477218628, "rewards/accuracies": 0.875, "rewards/chosen": -0.15534460544586182, "rewards/margins": 0.30854472517967224, "rewards/rejected": -0.46388933062553406, "step": 4381 }, { "epoch": 11.997262149212869, "grad_norm": 3.2723426818847656, "learning_rate": 3.9972602739726023e-07, "log_odds_chosen": 4.309733867645264, "log_odds_ratio": -0.09451573342084885, "logits/chosen": 1.094635248184204, "logits/rejected": 1.0906448364257812, "logps/chosen": -1.6354128122329712, "logps/rejected": -5.711957931518555, "loss": 0.5789, "nll_loss": 0.5694855451583862, "rewards/accuracies": 1.0, "rewards/chosen": -0.1635412722826004, "rewards/margins": 0.4076545834541321, "rewards/rejected": -0.5711958408355713, "step": 4382 }, { "epoch": 12.0, "grad_norm": 4.0079851150512695, "learning_rate": 3.995890410958904e-07, "log_odds_chosen": 3.2153944969177246, "log_odds_ratio": -0.14423251152038574, "logits/chosen": 1.2356739044189453, "logits/rejected": 1.3026137351989746, "logps/chosen": -2.3833236694335938, "logps/rejected": -5.424385070800781, "loss": 0.6902, "nll_loss": 0.6757497191429138, "rewards/accuracies": 1.0, "rewards/chosen": -0.2383323609828949, "rewards/margins": 0.3041061758995056, "rewards/rejected": -0.5424385070800781, "step": 4383 }, { "epoch": 12.002737850787133, "grad_norm": 3.8705642223358154, "learning_rate": 3.9945205479452054e-07, "log_odds_chosen": 2.5380685329437256, "log_odds_ratio": -0.15427929162979126, "logits/chosen": 0.9419925808906555, "logits/rejected": 0.8700923919677734, "logps/chosen": -1.4568864107131958, "logps/rejected": -3.7525506019592285, "loss": 0.5164, "nll_loss": 0.5009475946426392, "rewards/accuracies": 1.0, "rewards/chosen": -0.14568865299224854, "rewards/margins": 0.22956642508506775, "rewards/rejected": -0.3752550780773163, "step": 4384 }, { "epoch": 12.005475701574264, "grad_norm": 3.707602024078369, "learning_rate": 3.993150684931507e-07, "log_odds_chosen": 2.390556812286377, "log_odds_ratio": -0.19101651012897491, "logits/chosen": 1.3049449920654297, "logits/rejected": 1.3046599626541138, "logps/chosen": -1.919301986694336, "logps/rejected": -4.098659515380859, "loss": 0.4907, "nll_loss": 0.4715600311756134, "rewards/accuracies": 1.0, "rewards/chosen": -0.19193018972873688, "rewards/margins": 0.21793577075004578, "rewards/rejected": -0.40986594557762146, "step": 4385 }, { "epoch": 12.008213552361397, "grad_norm": 3.561807155609131, "learning_rate": 3.991780821917808e-07, "log_odds_chosen": 5.252617835998535, "log_odds_ratio": -0.23490972816944122, "logits/chosen": 1.249634861946106, "logits/rejected": 1.2817742824554443, "logps/chosen": -1.966707706451416, "logps/rejected": -7.099720478057861, "loss": 0.6372, "nll_loss": 0.6137361526489258, "rewards/accuracies": 0.875, "rewards/chosen": -0.1966707706451416, "rewards/margins": 0.5133013129234314, "rewards/rejected": -0.7099721431732178, "step": 4386 }, { "epoch": 12.010951403148528, "grad_norm": 4.151988983154297, "learning_rate": 3.9904109589041094e-07, "log_odds_chosen": 2.3307900428771973, "log_odds_ratio": -0.3041973412036896, "logits/chosen": 1.3233487606048584, "logits/rejected": 1.3572404384613037, "logps/chosen": -2.0657923221588135, "logps/rejected": -4.251246452331543, "loss": 0.5459, "nll_loss": 0.5155045986175537, "rewards/accuracies": 0.875, "rewards/chosen": -0.20657923817634583, "rewards/margins": 0.21854542195796967, "rewards/rejected": -0.4251246452331543, "step": 4387 }, { "epoch": 12.013689253935661, "grad_norm": 7.437351703643799, "learning_rate": 3.989041095890411e-07, "log_odds_chosen": 1.5823873281478882, "log_odds_ratio": -0.5560802221298218, "logits/chosen": 1.1930660009384155, "logits/rejected": 1.164006233215332, "logps/chosen": -2.5699472427368164, "logps/rejected": -3.957101345062256, "loss": 0.6269, "nll_loss": 0.5712758898735046, "rewards/accuracies": 0.75, "rewards/chosen": -0.25699472427368164, "rewards/margins": 0.13871538639068604, "rewards/rejected": -0.3957101106643677, "step": 4388 }, { "epoch": 12.016427104722792, "grad_norm": 3.49397873878479, "learning_rate": 3.987671232876712e-07, "log_odds_chosen": 2.837158679962158, "log_odds_ratio": -0.11640128493309021, "logits/chosen": 1.2018874883651733, "logits/rejected": 1.2591105699539185, "logps/chosen": -2.2273433208465576, "logps/rejected": -4.894343852996826, "loss": 0.5381, "nll_loss": 0.5264714956283569, "rewards/accuracies": 1.0, "rewards/chosen": -0.22273434698581696, "rewards/margins": 0.26670005917549133, "rewards/rejected": -0.4894343912601471, "step": 4389 }, { "epoch": 12.019164955509925, "grad_norm": 3.7412078380584717, "learning_rate": 3.9863013698630134e-07, "log_odds_chosen": 2.000283718109131, "log_odds_ratio": -0.1875668615102768, "logits/chosen": 0.9164313077926636, "logits/rejected": 0.9162635207176208, "logps/chosen": -2.1760287284851074, "logps/rejected": -4.007706642150879, "loss": 0.5473, "nll_loss": 0.5285698175430298, "rewards/accuracies": 1.0, "rewards/chosen": -0.21760287880897522, "rewards/margins": 0.18316775560379028, "rewards/rejected": -0.4007706642150879, "step": 4390 }, { "epoch": 12.021902806297057, "grad_norm": 4.055078029632568, "learning_rate": 3.984931506849315e-07, "log_odds_chosen": 4.1092143058776855, "log_odds_ratio": -0.23016877472400665, "logits/chosen": 1.0276641845703125, "logits/rejected": 1.0642359256744385, "logps/chosen": -2.6799204349517822, "logps/rejected": -6.693098068237305, "loss": 0.5414, "nll_loss": 0.5184000730514526, "rewards/accuracies": 1.0, "rewards/chosen": -0.2679920196533203, "rewards/margins": 0.401317834854126, "rewards/rejected": -0.6693098545074463, "step": 4391 }, { "epoch": 12.02464065708419, "grad_norm": 6.284039497375488, "learning_rate": 3.983561643835616e-07, "log_odds_chosen": 1.8333029747009277, "log_odds_ratio": -0.3788968026638031, "logits/chosen": 1.0064587593078613, "logits/rejected": 0.9676295518875122, "logps/chosen": -2.2830355167388916, "logps/rejected": -3.9502124786376953, "loss": 0.6126, "nll_loss": 0.5746978521347046, "rewards/accuracies": 0.875, "rewards/chosen": -0.22830353677272797, "rewards/margins": 0.16671770811080933, "rewards/rejected": -0.3950212597846985, "step": 4392 }, { "epoch": 12.02737850787132, "grad_norm": 9.821270942687988, "learning_rate": 3.9821917808219175e-07, "log_odds_chosen": 2.817538261413574, "log_odds_ratio": -0.33390387892723083, "logits/chosen": 1.120833158493042, "logits/rejected": 1.0592000484466553, "logps/chosen": -2.5544371604919434, "logps/rejected": -5.193876266479492, "loss": 0.6033, "nll_loss": 0.5699310302734375, "rewards/accuracies": 0.875, "rewards/chosen": -0.2554437518119812, "rewards/margins": 0.2639438509941101, "rewards/rejected": -0.5193876028060913, "step": 4393 }, { "epoch": 12.030116358658454, "grad_norm": 3.9385640621185303, "learning_rate": 3.980821917808219e-07, "log_odds_chosen": 1.90043044090271, "log_odds_ratio": -0.3156248927116394, "logits/chosen": 0.7664502263069153, "logits/rejected": 0.760536253452301, "logps/chosen": -2.325563669204712, "logps/rejected": -4.131030082702637, "loss": 0.6191, "nll_loss": 0.5875575542449951, "rewards/accuracies": 1.0, "rewards/chosen": -0.23255634307861328, "rewards/margins": 0.18054664134979248, "rewards/rejected": -0.41310298442840576, "step": 4394 }, { "epoch": 12.032854209445585, "grad_norm": 4.668301582336426, "learning_rate": 3.9794520547945205e-07, "log_odds_chosen": 2.7561373710632324, "log_odds_ratio": -0.16873875260353088, "logits/chosen": 1.1458494663238525, "logits/rejected": 1.1602180004119873, "logps/chosen": -1.904771327972412, "logps/rejected": -4.428059101104736, "loss": 0.5144, "nll_loss": 0.49753516912460327, "rewards/accuracies": 0.875, "rewards/chosen": -0.1904771327972412, "rewards/margins": 0.2523287832736969, "rewards/rejected": -0.4428059458732605, "step": 4395 }, { "epoch": 12.035592060232718, "grad_norm": 7.3414225578308105, "learning_rate": 3.9780821917808215e-07, "log_odds_chosen": 1.8710280656814575, "log_odds_ratio": -0.6316978931427002, "logits/chosen": 0.802282452583313, "logits/rejected": 0.7281776666641235, "logps/chosen": -2.6457877159118652, "logps/rejected": -4.329959869384766, "loss": 0.6655, "nll_loss": 0.6023143529891968, "rewards/accuracies": 0.875, "rewards/chosen": -0.26457878947257996, "rewards/margins": 0.16841718554496765, "rewards/rejected": -0.4329959452152252, "step": 4396 }, { "epoch": 12.038329911019849, "grad_norm": 11.11647891998291, "learning_rate": 3.9767123287671236e-07, "log_odds_chosen": 1.7513675689697266, "log_odds_ratio": -0.7370569109916687, "logits/chosen": 1.1615327596664429, "logits/rejected": 1.1942102909088135, "logps/chosen": -2.854809284210205, "logps/rejected": -4.448791027069092, "loss": 0.6501, "nll_loss": 0.5764286518096924, "rewards/accuracies": 0.875, "rewards/chosen": -0.28548091650009155, "rewards/margins": 0.1593981832265854, "rewards/rejected": -0.44487905502319336, "step": 4397 }, { "epoch": 12.041067761806982, "grad_norm": 4.245436191558838, "learning_rate": 3.9753424657534245e-07, "log_odds_chosen": 2.251725196838379, "log_odds_ratio": -0.19890566170215607, "logits/chosen": 0.838258683681488, "logits/rejected": 0.750914454460144, "logps/chosen": -2.521819591522217, "logps/rejected": -4.6440606117248535, "loss": 0.7626, "nll_loss": 0.7427355647087097, "rewards/accuracies": 0.875, "rewards/chosen": -0.2521819770336151, "rewards/margins": 0.2122241109609604, "rewards/rejected": -0.4644060730934143, "step": 4398 }, { "epoch": 12.043805612594113, "grad_norm": 3.6636807918548584, "learning_rate": 3.9739726027397255e-07, "log_odds_chosen": 1.1264715194702148, "log_odds_ratio": -0.3258265256881714, "logits/chosen": 1.0102108716964722, "logits/rejected": 0.9453212022781372, "logps/chosen": -1.5715771913528442, "logps/rejected": -2.5530920028686523, "loss": 0.4646, "nll_loss": 0.432006299495697, "rewards/accuracies": 1.0, "rewards/chosen": -0.15715771913528442, "rewards/margins": 0.09815149009227753, "rewards/rejected": -0.25530922412872314, "step": 4399 }, { "epoch": 12.046543463381246, "grad_norm": 3.932187080383301, "learning_rate": 3.972602739726027e-07, "log_odds_chosen": 3.1209776401519775, "log_odds_ratio": -0.09792108833789825, "logits/chosen": 1.1493041515350342, "logits/rejected": 1.1557214260101318, "logps/chosen": -1.845296859741211, "logps/rejected": -4.787027359008789, "loss": 0.4916, "nll_loss": 0.48182836174964905, "rewards/accuracies": 1.0, "rewards/chosen": -0.18452969193458557, "rewards/margins": 0.29417306184768677, "rewards/rejected": -0.4787027835845947, "step": 4400 }, { "epoch": 12.049281314168377, "grad_norm": 4.909632205963135, "learning_rate": 3.9712328767123286e-07, "log_odds_chosen": 3.0311279296875, "log_odds_ratio": -0.18170762062072754, "logits/chosen": 0.9536296129226685, "logits/rejected": 0.9440069198608398, "logps/chosen": -2.225385904312134, "logps/rejected": -5.099534034729004, "loss": 0.6605, "nll_loss": 0.6423044204711914, "rewards/accuracies": 1.0, "rewards/chosen": -0.22253860533237457, "rewards/margins": 0.2874147891998291, "rewards/rejected": -0.5099533796310425, "step": 4401 }, { "epoch": 12.05201916495551, "grad_norm": 3.904118776321411, "learning_rate": 3.96986301369863e-07, "log_odds_chosen": 3.9235823154449463, "log_odds_ratio": -0.16292643547058105, "logits/chosen": 1.1742757558822632, "logits/rejected": 1.1895291805267334, "logps/chosen": -1.7254348993301392, "logps/rejected": -5.423590660095215, "loss": 0.5864, "nll_loss": 0.5700828433036804, "rewards/accuracies": 1.0, "rewards/chosen": -0.1725434958934784, "rewards/margins": 0.3698155879974365, "rewards/rejected": -0.5423590540885925, "step": 4402 }, { "epoch": 12.054757015742641, "grad_norm": 8.327229499816895, "learning_rate": 3.968493150684931e-07, "log_odds_chosen": 1.7321858406066895, "log_odds_ratio": -0.43736058473587036, "logits/chosen": 0.8380829095840454, "logits/rejected": 0.788638710975647, "logps/chosen": -1.9563822746276855, "logps/rejected": -3.5230417251586914, "loss": 0.5368, "nll_loss": 0.4931105971336365, "rewards/accuracies": 0.875, "rewards/chosen": -0.19563822448253632, "rewards/margins": 0.15666593611240387, "rewards/rejected": -0.3523041605949402, "step": 4403 }, { "epoch": 12.057494866529774, "grad_norm": 7.314259052276611, "learning_rate": 3.967123287671233e-07, "log_odds_chosen": 2.667158842086792, "log_odds_ratio": -0.4624769389629364, "logits/chosen": 1.099541425704956, "logits/rejected": 1.0783257484436035, "logps/chosen": -2.1686370372772217, "logps/rejected": -4.707236289978027, "loss": 0.6216, "nll_loss": 0.5753276348114014, "rewards/accuracies": 0.875, "rewards/chosen": -0.2168637067079544, "rewards/margins": 0.25385990738868713, "rewards/rejected": -0.47072362899780273, "step": 4404 }, { "epoch": 12.060232717316905, "grad_norm": 4.4935431480407715, "learning_rate": 3.965753424657534e-07, "log_odds_chosen": 2.578545570373535, "log_odds_ratio": -0.27914562821388245, "logits/chosen": 0.8442574143409729, "logits/rejected": 0.8616423606872559, "logps/chosen": -2.219802141189575, "logps/rejected": -4.659783363342285, "loss": 0.6572, "nll_loss": 0.6293084621429443, "rewards/accuracies": 0.875, "rewards/chosen": -0.22198021411895752, "rewards/margins": 0.24399811029434204, "rewards/rejected": -0.4659782946109772, "step": 4405 }, { "epoch": 12.062970568104038, "grad_norm": 3.972308874130249, "learning_rate": 3.964383561643835e-07, "log_odds_chosen": 2.509584665298462, "log_odds_ratio": -0.18218103051185608, "logits/chosen": 1.0004894733428955, "logits/rejected": 0.994231104850769, "logps/chosen": -2.738985061645508, "logps/rejected": -5.145441055297852, "loss": 0.7249, "nll_loss": 0.706730842590332, "rewards/accuracies": 1.0, "rewards/chosen": -0.27389851212501526, "rewards/margins": 0.2406456023454666, "rewards/rejected": -0.5145441293716431, "step": 4406 }, { "epoch": 12.06570841889117, "grad_norm": 5.778366565704346, "learning_rate": 3.9630136986301366e-07, "log_odds_chosen": 2.402644157409668, "log_odds_ratio": -0.3638298809528351, "logits/chosen": 0.9938795566558838, "logits/rejected": 0.9520077705383301, "logps/chosen": -2.01357102394104, "logps/rejected": -4.159992218017578, "loss": 0.7066, "nll_loss": 0.6702597141265869, "rewards/accuracies": 0.875, "rewards/chosen": -0.20135711133480072, "rewards/margins": 0.21464213728904724, "rewards/rejected": -0.41599923372268677, "step": 4407 }, { "epoch": 12.068446269678303, "grad_norm": 14.009848594665527, "learning_rate": 3.961643835616438e-07, "log_odds_chosen": 2.1525251865386963, "log_odds_ratio": -0.3853383958339691, "logits/chosen": 1.4019625186920166, "logits/rejected": 1.3879421949386597, "logps/chosen": -2.908949613571167, "logps/rejected": -4.985902786254883, "loss": 0.7164, "nll_loss": 0.6778607368469238, "rewards/accuracies": 0.75, "rewards/chosen": -0.2908949553966522, "rewards/margins": 0.2076953947544098, "rewards/rejected": -0.498590350151062, "step": 4408 }, { "epoch": 12.071184120465434, "grad_norm": 4.10283899307251, "learning_rate": 3.9602739726027397e-07, "log_odds_chosen": 0.835912823677063, "log_odds_ratio": -0.5071761608123779, "logits/chosen": 1.046364188194275, "logits/rejected": 0.9731727838516235, "logps/chosen": -1.69193434715271, "logps/rejected": -2.2997305393218994, "loss": 0.5058, "nll_loss": 0.4550584554672241, "rewards/accuracies": 0.875, "rewards/chosen": -0.16919343173503876, "rewards/margins": 0.0607796236872673, "rewards/rejected": -0.22997304797172546, "step": 4409 }, { "epoch": 12.073921971252567, "grad_norm": 6.552910327911377, "learning_rate": 3.9589041095890407e-07, "log_odds_chosen": 3.598278760910034, "log_odds_ratio": -0.11563538014888763, "logits/chosen": 0.8119351863861084, "logits/rejected": 0.7920815348625183, "logps/chosen": -1.9938523769378662, "logps/rejected": -5.3974432945251465, "loss": 0.7235, "nll_loss": 0.7119005918502808, "rewards/accuracies": 1.0, "rewards/chosen": -0.19938522577285767, "rewards/margins": 0.340359091758728, "rewards/rejected": -0.5397443175315857, "step": 4410 }, { "epoch": 12.0766598220397, "grad_norm": 7.394410610198975, "learning_rate": 3.9575342465753427e-07, "log_odds_chosen": 2.1195645332336426, "log_odds_ratio": -0.6995282769203186, "logits/chosen": 1.2918317317962646, "logits/rejected": 1.3260014057159424, "logps/chosen": -2.6141223907470703, "logps/rejected": -4.657783031463623, "loss": 0.7244, "nll_loss": 0.6544193029403687, "rewards/accuracies": 0.625, "rewards/chosen": -0.26141226291656494, "rewards/margins": 0.2043660581111908, "rewards/rejected": -0.46577829122543335, "step": 4411 }, { "epoch": 12.07939767282683, "grad_norm": 3.308616876602173, "learning_rate": 3.9561643835616437e-07, "log_odds_chosen": 2.2224767208099365, "log_odds_ratio": -0.1826733946800232, "logits/chosen": 0.9510211944580078, "logits/rejected": 0.961531400680542, "logps/chosen": -1.3636682033538818, "logps/rejected": -3.3152554035186768, "loss": 0.4974, "nll_loss": 0.4791799783706665, "rewards/accuracies": 1.0, "rewards/chosen": -0.1363668143749237, "rewards/margins": 0.19515873491764069, "rewards/rejected": -0.3315255641937256, "step": 4412 }, { "epoch": 12.082135523613964, "grad_norm": 4.053825378417969, "learning_rate": 3.9547945205479447e-07, "log_odds_chosen": 1.310014247894287, "log_odds_ratio": -0.3530022203922272, "logits/chosen": 1.256653070449829, "logits/rejected": 1.1832295656204224, "logps/chosen": -1.2525527477264404, "logps/rejected": -2.349064350128174, "loss": 0.4508, "nll_loss": 0.41546300053596497, "rewards/accuracies": 0.875, "rewards/chosen": -0.125255286693573, "rewards/margins": 0.10965116322040558, "rewards/rejected": -0.23490643501281738, "step": 4413 }, { "epoch": 12.084873374401095, "grad_norm": 4.101044654846191, "learning_rate": 3.953424657534246e-07, "log_odds_chosen": 1.6484655141830444, "log_odds_ratio": -0.3031887412071228, "logits/chosen": 1.1874568462371826, "logits/rejected": 1.2436585426330566, "logps/chosen": -1.9491015672683716, "logps/rejected": -3.3740649223327637, "loss": 0.5198, "nll_loss": 0.489432156085968, "rewards/accuracies": 0.875, "rewards/chosen": -0.1949101686477661, "rewards/margins": 0.14249634742736816, "rewards/rejected": -0.3374064862728119, "step": 4414 }, { "epoch": 12.087611225188228, "grad_norm": 5.088544845581055, "learning_rate": 3.952054794520548e-07, "log_odds_chosen": 0.867965817451477, "log_odds_ratio": -0.4927428662776947, "logits/chosen": 0.963595986366272, "logits/rejected": 0.9485827088356018, "logps/chosen": -1.939307451248169, "logps/rejected": -2.720202684402466, "loss": 0.5807, "nll_loss": 0.5313948392868042, "rewards/accuracies": 0.5, "rewards/chosen": -0.1939307451248169, "rewards/margins": 0.07808953523635864, "rewards/rejected": -0.27202028036117554, "step": 4415 }, { "epoch": 12.09034907597536, "grad_norm": 3.4356725215911865, "learning_rate": 3.9506849315068493e-07, "log_odds_chosen": 1.9488450288772583, "log_odds_ratio": -0.23436236381530762, "logits/chosen": 0.8207653760910034, "logits/rejected": 0.8227482438087463, "logps/chosen": -1.667393445968628, "logps/rejected": -3.4109253883361816, "loss": 0.5422, "nll_loss": 0.5187171697616577, "rewards/accuracies": 0.875, "rewards/chosen": -0.166739359498024, "rewards/margins": 0.1743531972169876, "rewards/rejected": -0.3410925269126892, "step": 4416 }, { "epoch": 12.093086926762492, "grad_norm": 3.6087443828582764, "learning_rate": 3.94931506849315e-07, "log_odds_chosen": 2.2239739894866943, "log_odds_ratio": -0.21156153082847595, "logits/chosen": 1.0037024021148682, "logits/rejected": 0.9112840890884399, "logps/chosen": -1.3970471620559692, "logps/rejected": -3.403414011001587, "loss": 0.595, "nll_loss": 0.573865532875061, "rewards/accuracies": 1.0, "rewards/chosen": -0.13970471918582916, "rewards/margins": 0.20063669979572296, "rewards/rejected": -0.3403414487838745, "step": 4417 }, { "epoch": 12.095824777549623, "grad_norm": 3.7028419971466064, "learning_rate": 3.9479452054794523e-07, "log_odds_chosen": 2.924867630004883, "log_odds_ratio": -0.23230528831481934, "logits/chosen": 1.0884075164794922, "logits/rejected": 1.0928635597229004, "logps/chosen": -1.934834361076355, "logps/rejected": -4.708237648010254, "loss": 0.533, "nll_loss": 0.5097373127937317, "rewards/accuracies": 1.0, "rewards/chosen": -0.19348345696926117, "rewards/margins": 0.2773403525352478, "rewards/rejected": -0.4708237946033478, "step": 4418 }, { "epoch": 12.098562628336756, "grad_norm": 7.2633056640625, "learning_rate": 3.9465753424657533e-07, "log_odds_chosen": 3.0230698585510254, "log_odds_ratio": -0.5852066874504089, "logits/chosen": 0.9023868441581726, "logits/rejected": 0.914056122303009, "logps/chosen": -2.2644541263580322, "logps/rejected": -5.049203395843506, "loss": 0.6582, "nll_loss": 0.5996482372283936, "rewards/accuracies": 0.875, "rewards/chosen": -0.22644540667533875, "rewards/margins": 0.27847492694854736, "rewards/rejected": -0.5049203038215637, "step": 4419 }, { "epoch": 12.101300479123887, "grad_norm": 3.65079665184021, "learning_rate": 3.9452054794520543e-07, "log_odds_chosen": 3.214787721633911, "log_odds_ratio": -0.17700816690921783, "logits/chosen": 0.8420064449310303, "logits/rejected": 0.8130267858505249, "logps/chosen": -1.781454086303711, "logps/rejected": -4.802189826965332, "loss": 0.6118, "nll_loss": 0.5941057205200195, "rewards/accuracies": 1.0, "rewards/chosen": -0.1781454086303711, "rewards/margins": 0.30207359790802, "rewards/rejected": -0.4802190065383911, "step": 4420 }, { "epoch": 12.10403832991102, "grad_norm": 3.6230721473693848, "learning_rate": 3.943835616438356e-07, "log_odds_chosen": 2.249972343444824, "log_odds_ratio": -0.18806132674217224, "logits/chosen": 0.9975607991218567, "logits/rejected": 0.9875092506408691, "logps/chosen": -1.630913257598877, "logps/rejected": -3.6976318359375, "loss": 0.5491, "nll_loss": 0.5303394794464111, "rewards/accuracies": 1.0, "rewards/chosen": -0.16309133172035217, "rewards/margins": 0.20667187869548798, "rewards/rejected": -0.36976319551467896, "step": 4421 }, { "epoch": 12.106776180698152, "grad_norm": 3.6312437057495117, "learning_rate": 3.9424657534246573e-07, "log_odds_chosen": 2.8550844192504883, "log_odds_ratio": -0.16081897914409637, "logits/chosen": 0.8729782104492188, "logits/rejected": 0.8023471236228943, "logps/chosen": -1.9230290651321411, "logps/rejected": -4.611961841583252, "loss": 0.4881, "nll_loss": 0.4719965159893036, "rewards/accuracies": 1.0, "rewards/chosen": -0.1923029124736786, "rewards/margins": 0.2688932418823242, "rewards/rejected": -0.4611961841583252, "step": 4422 }, { "epoch": 12.109514031485284, "grad_norm": 4.728236198425293, "learning_rate": 3.941095890410959e-07, "log_odds_chosen": 0.9348992705345154, "log_odds_ratio": -0.506939172744751, "logits/chosen": 1.3116590976715088, "logits/rejected": 1.2952574491500854, "logps/chosen": -2.4931814670562744, "logps/rejected": -3.2221271991729736, "loss": 0.5132, "nll_loss": 0.46248871088027954, "rewards/accuracies": 0.75, "rewards/chosen": -0.24931813776493073, "rewards/margins": 0.07289458811283112, "rewards/rejected": -0.32221272587776184, "step": 4423 }, { "epoch": 12.112251882272416, "grad_norm": 3.951195478439331, "learning_rate": 3.93972602739726e-07, "log_odds_chosen": 1.489619255065918, "log_odds_ratio": -0.22978658974170685, "logits/chosen": 0.945435106754303, "logits/rejected": 0.911217451095581, "logps/chosen": -1.8492050170898438, "logps/rejected": -3.158635139465332, "loss": 0.5084, "nll_loss": 0.4854329228401184, "rewards/accuracies": 1.0, "rewards/chosen": -0.1849205046892166, "rewards/margins": 0.13094301521778107, "rewards/rejected": -0.3158635199069977, "step": 4424 }, { "epoch": 12.114989733059549, "grad_norm": 3.1391241550445557, "learning_rate": 3.938356164383562e-07, "log_odds_chosen": 3.0677523612976074, "log_odds_ratio": -0.12958809733390808, "logits/chosen": 1.1020293235778809, "logits/rejected": 1.0554403066635132, "logps/chosen": -2.007767915725708, "logps/rejected": -4.880038261413574, "loss": 0.5076, "nll_loss": 0.49462807178497314, "rewards/accuracies": 1.0, "rewards/chosen": -0.20077678561210632, "rewards/margins": 0.287227064371109, "rewards/rejected": -0.48800384998321533, "step": 4425 }, { "epoch": 12.11772758384668, "grad_norm": 4.38693380355835, "learning_rate": 3.936986301369863e-07, "log_odds_chosen": 2.0978662967681885, "log_odds_ratio": -0.23802338540554047, "logits/chosen": 0.887493371963501, "logits/rejected": 0.9563452005386353, "logps/chosen": -1.8437163829803467, "logps/rejected": -3.7591350078582764, "loss": 0.6527, "nll_loss": 0.6288881897926331, "rewards/accuracies": 1.0, "rewards/chosen": -0.18437165021896362, "rewards/margins": 0.1915418654680252, "rewards/rejected": -0.37591350078582764, "step": 4426 }, { "epoch": 12.120465434633813, "grad_norm": 3.4726450443267822, "learning_rate": 3.935616438356164e-07, "log_odds_chosen": 1.1780788898468018, "log_odds_ratio": -0.3236781656742096, "logits/chosen": 1.3299797773361206, "logits/rejected": 1.2379014492034912, "logps/chosen": -1.5868544578552246, "logps/rejected": -2.6207222938537598, "loss": 0.5103, "nll_loss": 0.47794342041015625, "rewards/accuracies": 1.0, "rewards/chosen": -0.15868546068668365, "rewards/margins": 0.1033867821097374, "rewards/rejected": -0.26207223534584045, "step": 4427 }, { "epoch": 12.123203285420944, "grad_norm": 6.25553035736084, "learning_rate": 3.934246575342466e-07, "log_odds_chosen": 1.9793466329574585, "log_odds_ratio": -0.24791552126407623, "logits/chosen": 0.8323116302490234, "logits/rejected": 0.7853115797042847, "logps/chosen": -1.578735589981079, "logps/rejected": -3.3500328063964844, "loss": 0.519, "nll_loss": 0.49425452947616577, "rewards/accuracies": 1.0, "rewards/chosen": -0.15787355601787567, "rewards/margins": 0.17712971568107605, "rewards/rejected": -0.3350032567977905, "step": 4428 }, { "epoch": 12.125941136208077, "grad_norm": 3.865250825881958, "learning_rate": 3.932876712328767e-07, "log_odds_chosen": 2.6074697971343994, "log_odds_ratio": -0.17779770493507385, "logits/chosen": 1.0789347887039185, "logits/rejected": 1.0597612857818604, "logps/chosen": -2.021432399749756, "logps/rejected": -4.46775484085083, "loss": 0.5623, "nll_loss": 0.5445525646209717, "rewards/accuracies": 1.0, "rewards/chosen": -0.20214325189590454, "rewards/margins": 0.24463222920894623, "rewards/rejected": -0.4467754662036896, "step": 4429 }, { "epoch": 12.128678986995208, "grad_norm": 4.194814205169678, "learning_rate": 3.9315068493150684e-07, "log_odds_chosen": 2.131551742553711, "log_odds_ratio": -0.22425052523612976, "logits/chosen": 0.9556527137756348, "logits/rejected": 0.8736652135848999, "logps/chosen": -1.8279775381088257, "logps/rejected": -3.7894177436828613, "loss": 0.506, "nll_loss": 0.4835343062877655, "rewards/accuracies": 0.875, "rewards/chosen": -0.18279775977134705, "rewards/margins": 0.1961440145969391, "rewards/rejected": -0.37894177436828613, "step": 4430 }, { "epoch": 12.131416837782341, "grad_norm": 4.789638996124268, "learning_rate": 3.9301369863013694e-07, "log_odds_chosen": 2.8601365089416504, "log_odds_ratio": -0.45801842212677, "logits/chosen": 1.1093829870224, "logits/rejected": 1.1025140285491943, "logps/chosen": -2.022170066833496, "logps/rejected": -4.725608825683594, "loss": 0.6813, "nll_loss": 0.6354587078094482, "rewards/accuracies": 0.75, "rewards/chosen": -0.2022170126438141, "rewards/margins": 0.2703438699245453, "rewards/rejected": -0.4725608825683594, "step": 4431 }, { "epoch": 12.134154688569472, "grad_norm": 3.2931368350982666, "learning_rate": 3.9287671232876715e-07, "log_odds_chosen": 3.0523738861083984, "log_odds_ratio": -0.16629324853420258, "logits/chosen": 1.2292790412902832, "logits/rejected": 1.2511743307113647, "logps/chosen": -1.836852788925171, "logps/rejected": -4.678672790527344, "loss": 0.5751, "nll_loss": 0.5584921836853027, "rewards/accuracies": 1.0, "rewards/chosen": -0.1836852729320526, "rewards/margins": 0.28418201208114624, "rewards/rejected": -0.46786725521087646, "step": 4432 }, { "epoch": 12.136892539356605, "grad_norm": 3.9754252433776855, "learning_rate": 3.9273972602739725e-07, "log_odds_chosen": 1.9827837944030762, "log_odds_ratio": -0.26149219274520874, "logits/chosen": 1.0891788005828857, "logits/rejected": 1.1392202377319336, "logps/chosen": -2.1605734825134277, "logps/rejected": -4.010022163391113, "loss": 0.6063, "nll_loss": 0.580173134803772, "rewards/accuracies": 1.0, "rewards/chosen": -0.21605737507343292, "rewards/margins": 0.18494485318660736, "rewards/rejected": -0.4010022282600403, "step": 4433 }, { "epoch": 12.139630390143736, "grad_norm": 4.109029769897461, "learning_rate": 3.9260273972602735e-07, "log_odds_chosen": 2.024951457977295, "log_odds_ratio": -0.3671165704727173, "logits/chosen": 1.0601251125335693, "logits/rejected": 1.0332409143447876, "logps/chosen": -1.5311133861541748, "logps/rejected": -3.3967931270599365, "loss": 0.5859, "nll_loss": 0.549167275428772, "rewards/accuracies": 0.625, "rewards/chosen": -0.15311133861541748, "rewards/margins": 0.1865679919719696, "rewards/rejected": -0.3396793305873871, "step": 4434 }, { "epoch": 12.14236824093087, "grad_norm": 3.402662515640259, "learning_rate": 3.9246575342465755e-07, "log_odds_chosen": 1.706062912940979, "log_odds_ratio": -0.266677588224411, "logits/chosen": 0.8521447777748108, "logits/rejected": 0.813611626625061, "logps/chosen": -1.5895942449569702, "logps/rejected": -3.09552001953125, "loss": 0.6376, "nll_loss": 0.6109376549720764, "rewards/accuracies": 0.875, "rewards/chosen": -0.15895941853523254, "rewards/margins": 0.1505926102399826, "rewards/rejected": -0.30955201387405396, "step": 4435 }, { "epoch": 12.145106091718002, "grad_norm": 3.5833916664123535, "learning_rate": 3.9232876712328765e-07, "log_odds_chosen": 2.4170398712158203, "log_odds_ratio": -0.20916897058486938, "logits/chosen": 0.7563799619674683, "logits/rejected": 0.7271419167518616, "logps/chosen": -2.1220624446868896, "logps/rejected": -4.279007911682129, "loss": 0.7527, "nll_loss": 0.7317862510681152, "rewards/accuracies": 0.875, "rewards/chosen": -0.21220624446868896, "rewards/margins": 0.2156945765018463, "rewards/rejected": -0.42790085077285767, "step": 4436 }, { "epoch": 12.147843942505133, "grad_norm": 4.0922627449035645, "learning_rate": 3.921917808219178e-07, "log_odds_chosen": 2.821837902069092, "log_odds_ratio": -0.26948410272598267, "logits/chosen": 1.0072590112686157, "logits/rejected": 1.01533043384552, "logps/chosen": -1.903524398803711, "logps/rejected": -4.608455657958984, "loss": 0.5919, "nll_loss": 0.5649439096450806, "rewards/accuracies": 0.875, "rewards/chosen": -0.1903524547815323, "rewards/margins": 0.27049314975738525, "rewards/rejected": -0.46084558963775635, "step": 4437 }, { "epoch": 12.150581793292266, "grad_norm": 3.792583703994751, "learning_rate": 3.920547945205479e-07, "log_odds_chosen": 2.177396059036255, "log_odds_ratio": -0.3706718385219574, "logits/chosen": 1.1357743740081787, "logits/rejected": 1.1678025722503662, "logps/chosen": -2.726388931274414, "logps/rejected": -4.6979241371154785, "loss": 0.5535, "nll_loss": 0.5164121389389038, "rewards/accuracies": 0.875, "rewards/chosen": -0.2726389169692993, "rewards/margins": 0.19715353846549988, "rewards/rejected": -0.4697924256324768, "step": 4438 }, { "epoch": 12.153319644079398, "grad_norm": 3.687986135482788, "learning_rate": 3.919178082191781e-07, "log_odds_chosen": 2.4957828521728516, "log_odds_ratio": -0.19092664122581482, "logits/chosen": 1.1168851852416992, "logits/rejected": 1.1142349243164062, "logps/chosen": -2.050820827484131, "logps/rejected": -4.40679931640625, "loss": 0.6511, "nll_loss": 0.6319810152053833, "rewards/accuracies": 1.0, "rewards/chosen": -0.20508208870887756, "rewards/margins": 0.2355978786945343, "rewards/rejected": -0.44067996740341187, "step": 4439 }, { "epoch": 12.15605749486653, "grad_norm": 3.304344654083252, "learning_rate": 3.917808219178082e-07, "log_odds_chosen": 2.6763908863067627, "log_odds_ratio": -0.24794375896453857, "logits/chosen": 0.9299753904342651, "logits/rejected": 0.8602272272109985, "logps/chosen": -1.5347630977630615, "logps/rejected": -3.9804022312164307, "loss": 0.5776, "nll_loss": 0.5527747273445129, "rewards/accuracies": 1.0, "rewards/chosen": -0.1534763127565384, "rewards/margins": 0.24456393718719482, "rewards/rejected": -0.398040235042572, "step": 4440 }, { "epoch": 12.158795345653662, "grad_norm": 3.824934720993042, "learning_rate": 3.916438356164383e-07, "log_odds_chosen": 3.455411434173584, "log_odds_ratio": -0.1691288948059082, "logits/chosen": 1.1856696605682373, "logits/rejected": 1.1720861196517944, "logps/chosen": -1.9982051849365234, "logps/rejected": -5.249026775360107, "loss": 0.5856, "nll_loss": 0.5686765313148499, "rewards/accuracies": 0.875, "rewards/chosen": -0.19982051849365234, "rewards/margins": 0.3250821828842163, "rewards/rejected": -0.5249027013778687, "step": 4441 }, { "epoch": 12.161533196440795, "grad_norm": 3.2201945781707764, "learning_rate": 3.915068493150685e-07, "log_odds_chosen": 2.2642476558685303, "log_odds_ratio": -0.2731494605541229, "logits/chosen": 1.1787351369857788, "logits/rejected": 1.1824153661727905, "logps/chosen": -1.6572253704071045, "logps/rejected": -3.7138254642486572, "loss": 0.4774, "nll_loss": 0.450131356716156, "rewards/accuracies": 0.875, "rewards/chosen": -0.1657225489616394, "rewards/margins": 0.20566001534461975, "rewards/rejected": -0.37138253450393677, "step": 4442 }, { "epoch": 12.164271047227926, "grad_norm": 3.383653402328491, "learning_rate": 3.913698630136986e-07, "log_odds_chosen": 3.228264808654785, "log_odds_ratio": -0.1455136388540268, "logits/chosen": 0.7287421822547913, "logits/rejected": 0.6430185437202454, "logps/chosen": -1.6692551374435425, "logps/rejected": -4.697566509246826, "loss": 0.5674, "nll_loss": 0.5528755784034729, "rewards/accuracies": 1.0, "rewards/chosen": -0.16692551970481873, "rewards/margins": 0.30283114314079285, "rewards/rejected": -0.46975669264793396, "step": 4443 }, { "epoch": 12.167008898015059, "grad_norm": 3.053812265396118, "learning_rate": 3.9123287671232876e-07, "log_odds_chosen": 3.594119071960449, "log_odds_ratio": -0.05701739341020584, "logits/chosen": 1.0312747955322266, "logits/rejected": 1.0737085342407227, "logps/chosen": -1.3948898315429688, "logps/rejected": -4.544808387756348, "loss": 0.4762, "nll_loss": 0.4704790413379669, "rewards/accuracies": 1.0, "rewards/chosen": -0.13948898017406464, "rewards/margins": 0.31499183177948, "rewards/rejected": -0.4544808268547058, "step": 4444 }, { "epoch": 12.16974674880219, "grad_norm": 4.720108509063721, "learning_rate": 3.9109589041095886e-07, "log_odds_chosen": 1.6165629625320435, "log_odds_ratio": -0.6055055856704712, "logits/chosen": 0.9198268055915833, "logits/rejected": 0.9151341915130615, "logps/chosen": -2.442591667175293, "logps/rejected": -3.938530921936035, "loss": 0.767, "nll_loss": 0.7064225673675537, "rewards/accuracies": 0.875, "rewards/chosen": -0.24425916373729706, "rewards/margins": 0.14959393441677094, "rewards/rejected": -0.3938531279563904, "step": 4445 }, { "epoch": 12.172484599589323, "grad_norm": 3.683255910873413, "learning_rate": 3.90958904109589e-07, "log_odds_chosen": 2.9784228801727295, "log_odds_ratio": -0.1305231750011444, "logits/chosen": 1.2570356130599976, "logits/rejected": 1.254486322402954, "logps/chosen": -1.730942726135254, "logps/rejected": -4.428712368011475, "loss": 0.5306, "nll_loss": 0.5175966024398804, "rewards/accuracies": 1.0, "rewards/chosen": -0.1730942726135254, "rewards/margins": 0.26977699995040894, "rewards/rejected": -0.4428712725639343, "step": 4446 }, { "epoch": 12.175222450376454, "grad_norm": 4.144827365875244, "learning_rate": 3.9082191780821917e-07, "log_odds_chosen": 1.8991036415100098, "log_odds_ratio": -0.22448204457759857, "logits/chosen": 0.7397114634513855, "logits/rejected": 0.695753812789917, "logps/chosen": -1.6856813430786133, "logps/rejected": -3.406437873840332, "loss": 0.6503, "nll_loss": 0.627830982208252, "rewards/accuracies": 1.0, "rewards/chosen": -0.16856813430786133, "rewards/margins": 0.17207567393779755, "rewards/rejected": -0.34064382314682007, "step": 4447 }, { "epoch": 12.177960301163587, "grad_norm": 3.370694875717163, "learning_rate": 3.9068493150684926e-07, "log_odds_chosen": 2.7412030696868896, "log_odds_ratio": -0.18202033638954163, "logits/chosen": 0.9684563875198364, "logits/rejected": 0.9686242341995239, "logps/chosen": -1.4699902534484863, "logps/rejected": -3.949463367462158, "loss": 0.4823, "nll_loss": 0.46404969692230225, "rewards/accuracies": 1.0, "rewards/chosen": -0.1469990462064743, "rewards/margins": 0.2479473054409027, "rewards/rejected": -0.3949463367462158, "step": 4448 }, { "epoch": 12.180698151950718, "grad_norm": 3.5096023082733154, "learning_rate": 3.9054794520547947e-07, "log_odds_chosen": 1.793293833732605, "log_odds_ratio": -0.32648980617523193, "logits/chosen": 0.836986780166626, "logits/rejected": 0.8849824666976929, "logps/chosen": -2.1048107147216797, "logps/rejected": -3.748631000518799, "loss": 0.6007, "nll_loss": 0.56801438331604, "rewards/accuracies": 0.875, "rewards/chosen": -0.21048106253147125, "rewards/margins": 0.16438207030296326, "rewards/rejected": -0.3748631179332733, "step": 4449 }, { "epoch": 12.183436002737851, "grad_norm": 3.9376237392425537, "learning_rate": 3.9041095890410957e-07, "log_odds_chosen": 3.6893222332000732, "log_odds_ratio": -0.15978409349918365, "logits/chosen": 0.9350814819335938, "logits/rejected": 0.978635847568512, "logps/chosen": -1.5422818660736084, "logps/rejected": -4.964174747467041, "loss": 0.6641, "nll_loss": 0.6481319665908813, "rewards/accuracies": 1.0, "rewards/chosen": -0.15422819554805756, "rewards/margins": 0.3421892523765564, "rewards/rejected": -0.49641746282577515, "step": 4450 }, { "epoch": 12.186173853524982, "grad_norm": 3.1450533866882324, "learning_rate": 3.902739726027397e-07, "log_odds_chosen": 3.328944444656372, "log_odds_ratio": -0.14318092167377472, "logits/chosen": 1.102493405342102, "logits/rejected": 1.109872579574585, "logps/chosen": -1.5162464380264282, "logps/rejected": -4.643976211547852, "loss": 0.4901, "nll_loss": 0.475792795419693, "rewards/accuracies": 1.0, "rewards/chosen": -0.1516246497631073, "rewards/margins": 0.3127729594707489, "rewards/rejected": -0.4643976092338562, "step": 4451 }, { "epoch": 12.188911704312115, "grad_norm": 3.081965446472168, "learning_rate": 3.901369863013698e-07, "log_odds_chosen": 4.73261833190918, "log_odds_ratio": -0.14985837042331696, "logits/chosen": 1.197882890701294, "logits/rejected": 1.1948708295822144, "logps/chosen": -1.628845453262329, "logps/rejected": -6.1583356857299805, "loss": 0.5899, "nll_loss": 0.5748985409736633, "rewards/accuracies": 0.875, "rewards/chosen": -0.16288454830646515, "rewards/margins": 0.45294904708862305, "rewards/rejected": -0.615833580493927, "step": 4452 }, { "epoch": 12.191649555099247, "grad_norm": 3.6935391426086426, "learning_rate": 3.8999999999999997e-07, "log_odds_chosen": 2.786407947540283, "log_odds_ratio": -0.31762391328811646, "logits/chosen": 1.1086574792861938, "logits/rejected": 1.0596976280212402, "logps/chosen": -1.2422683238983154, "logps/rejected": -3.7751049995422363, "loss": 0.4722, "nll_loss": 0.44044229388237, "rewards/accuracies": 1.0, "rewards/chosen": -0.12422682344913483, "rewards/margins": 0.25328367948532104, "rewards/rejected": -0.37751051783561707, "step": 4453 }, { "epoch": 12.19438740588638, "grad_norm": 4.0450334548950195, "learning_rate": 3.898630136986301e-07, "log_odds_chosen": 2.522357702255249, "log_odds_ratio": -0.31407591700553894, "logits/chosen": 0.9254583120346069, "logits/rejected": 0.9205654263496399, "logps/chosen": -1.741387128829956, "logps/rejected": -4.0818400382995605, "loss": 0.6069, "nll_loss": 0.5754772424697876, "rewards/accuracies": 0.875, "rewards/chosen": -0.17413872480392456, "rewards/margins": 0.23404529690742493, "rewards/rejected": -0.4081840515136719, "step": 4454 }, { "epoch": 12.19712525667351, "grad_norm": 3.6019115447998047, "learning_rate": 3.897260273972602e-07, "log_odds_chosen": 1.4591732025146484, "log_odds_ratio": -0.3382551074028015, "logits/chosen": 1.143070101737976, "logits/rejected": 1.14933443069458, "logps/chosen": -1.872349739074707, "logps/rejected": -3.226060390472412, "loss": 0.5287, "nll_loss": 0.49483853578567505, "rewards/accuracies": 0.75, "rewards/chosen": -0.18723496794700623, "rewards/margins": 0.13537105917930603, "rewards/rejected": -0.32260602712631226, "step": 4455 }, { "epoch": 12.199863107460644, "grad_norm": 3.863618850708008, "learning_rate": 3.8958904109589043e-07, "log_odds_chosen": 2.4260592460632324, "log_odds_ratio": -0.23090329766273499, "logits/chosen": 0.887091875076294, "logits/rejected": 0.8168377876281738, "logps/chosen": -1.2937074899673462, "logps/rejected": -3.4158923625946045, "loss": 0.5208, "nll_loss": 0.49773290753364563, "rewards/accuracies": 1.0, "rewards/chosen": -0.12937074899673462, "rewards/margins": 0.2122184783220291, "rewards/rejected": -0.3415892422199249, "step": 4456 }, { "epoch": 12.202600958247775, "grad_norm": 3.358980178833008, "learning_rate": 3.8945205479452053e-07, "log_odds_chosen": 2.841559886932373, "log_odds_ratio": -0.2503809630870819, "logits/chosen": 1.0343050956726074, "logits/rejected": 1.0146465301513672, "logps/chosen": -1.8187980651855469, "logps/rejected": -4.5425519943237305, "loss": 0.5648, "nll_loss": 0.5397614240646362, "rewards/accuracies": 0.875, "rewards/chosen": -0.18187981843948364, "rewards/margins": 0.2723753750324249, "rewards/rejected": -0.4542551636695862, "step": 4457 }, { "epoch": 12.205338809034908, "grad_norm": 3.676767349243164, "learning_rate": 3.893150684931507e-07, "log_odds_chosen": 1.920133352279663, "log_odds_ratio": -0.20311813056468964, "logits/chosen": 0.813896894454956, "logits/rejected": 0.7464407086372375, "logps/chosen": -1.8000175952911377, "logps/rejected": -3.5686116218566895, "loss": 0.5267, "nll_loss": 0.5063512325286865, "rewards/accuracies": 1.0, "rewards/chosen": -0.18000175058841705, "rewards/margins": 0.17685940861701965, "rewards/rejected": -0.3568611741065979, "step": 4458 }, { "epoch": 12.208076659822039, "grad_norm": 2.977552890777588, "learning_rate": 3.891780821917808e-07, "log_odds_chosen": 3.4352569580078125, "log_odds_ratio": -0.09777677804231644, "logits/chosen": 0.9649626612663269, "logits/rejected": 0.9964234828948975, "logps/chosen": -1.577179193496704, "logps/rejected": -4.756034851074219, "loss": 0.5056, "nll_loss": 0.49585941433906555, "rewards/accuracies": 1.0, "rewards/chosen": -0.15771791338920593, "rewards/margins": 0.3178855776786804, "rewards/rejected": -0.47560352087020874, "step": 4459 }, { "epoch": 12.210814510609172, "grad_norm": 5.758825302124023, "learning_rate": 3.8904109589041093e-07, "log_odds_chosen": 2.387636184692383, "log_odds_ratio": -0.48758822679519653, "logits/chosen": 0.8891984224319458, "logits/rejected": 0.8691585063934326, "logps/chosen": -2.3499984741210938, "logps/rejected": -4.639960765838623, "loss": 0.6205, "nll_loss": 0.5717252492904663, "rewards/accuracies": 0.625, "rewards/chosen": -0.2349998652935028, "rewards/margins": 0.22899623215198517, "rewards/rejected": -0.46399611234664917, "step": 4460 }, { "epoch": 12.213552361396303, "grad_norm": 4.401618480682373, "learning_rate": 3.889041095890411e-07, "log_odds_chosen": 1.6067265272140503, "log_odds_ratio": -0.24069428443908691, "logits/chosen": 0.769241452217102, "logits/rejected": 0.7468080520629883, "logps/chosen": -1.8475842475891113, "logps/rejected": -3.221745014190674, "loss": 0.5981, "nll_loss": 0.5740721225738525, "rewards/accuracies": 1.0, "rewards/chosen": -0.18475842475891113, "rewards/margins": 0.1374160647392273, "rewards/rejected": -0.3221744894981384, "step": 4461 }, { "epoch": 12.216290212183436, "grad_norm": 3.528317451477051, "learning_rate": 3.887671232876712e-07, "log_odds_chosen": 3.2998833656311035, "log_odds_ratio": -0.21267522871494293, "logits/chosen": 1.011367678642273, "logits/rejected": 0.948207676410675, "logps/chosen": -1.5714290142059326, "logps/rejected": -4.602187156677246, "loss": 0.6279, "nll_loss": 0.6065976619720459, "rewards/accuracies": 1.0, "rewards/chosen": -0.15714290738105774, "rewards/margins": 0.30307576060295105, "rewards/rejected": -0.4602186679840088, "step": 4462 }, { "epoch": 12.219028062970569, "grad_norm": 3.993924617767334, "learning_rate": 3.886301369863014e-07, "log_odds_chosen": 2.1712584495544434, "log_odds_ratio": -0.23577766120433807, "logits/chosen": 1.001190185546875, "logits/rejected": 0.9304866790771484, "logps/chosen": -1.9586085081100464, "logps/rejected": -4.005211353302002, "loss": 0.5862, "nll_loss": 0.5626046657562256, "rewards/accuracies": 1.0, "rewards/chosen": -0.1958608478307724, "rewards/margins": 0.2046603113412857, "rewards/rejected": -0.4005211591720581, "step": 4463 }, { "epoch": 12.2217659137577, "grad_norm": 3.923297643661499, "learning_rate": 3.884931506849315e-07, "log_odds_chosen": 3.24735164642334, "log_odds_ratio": -0.41061079502105713, "logits/chosen": 0.8114421963691711, "logits/rejected": 0.8035023212432861, "logps/chosen": -2.3566665649414062, "logps/rejected": -5.517341613769531, "loss": 0.6362, "nll_loss": 0.5951143503189087, "rewards/accuracies": 0.75, "rewards/chosen": -0.2356666624546051, "rewards/margins": 0.31606748700141907, "rewards/rejected": -0.5517341494560242, "step": 4464 }, { "epoch": 12.224503764544833, "grad_norm": 4.089692115783691, "learning_rate": 3.8835616438356164e-07, "log_odds_chosen": 1.858494520187378, "log_odds_ratio": -0.21463623642921448, "logits/chosen": 1.091844081878662, "logits/rejected": 1.0868556499481201, "logps/chosen": -1.7039039134979248, "logps/rejected": -3.3730201721191406, "loss": 0.5246, "nll_loss": 0.5031787753105164, "rewards/accuracies": 1.0, "rewards/chosen": -0.17039039731025696, "rewards/margins": 0.16691163182258606, "rewards/rejected": -0.337302029132843, "step": 4465 }, { "epoch": 12.227241615331964, "grad_norm": 3.3172457218170166, "learning_rate": 3.882191780821918e-07, "log_odds_chosen": 3.2333292961120605, "log_odds_ratio": -0.10961980372667313, "logits/chosen": 1.204401969909668, "logits/rejected": 1.2383661270141602, "logps/chosen": -1.6920044422149658, "logps/rejected": -4.724338054656982, "loss": 0.4786, "nll_loss": 0.46758899092674255, "rewards/accuracies": 1.0, "rewards/chosen": -0.16920045018196106, "rewards/margins": 0.3032333552837372, "rewards/rejected": -0.47243380546569824, "step": 4466 }, { "epoch": 12.229979466119097, "grad_norm": 3.152784585952759, "learning_rate": 3.880821917808219e-07, "log_odds_chosen": 3.305901050567627, "log_odds_ratio": -0.12666864693164825, "logits/chosen": 0.9836825132369995, "logits/rejected": 0.9591063857078552, "logps/chosen": -1.5667966604232788, "logps/rejected": -4.62545108795166, "loss": 0.5541, "nll_loss": 0.5413835644721985, "rewards/accuracies": 1.0, "rewards/chosen": -0.1566796600818634, "rewards/margins": 0.30586543679237366, "rewards/rejected": -0.46254509687423706, "step": 4467 }, { "epoch": 12.232717316906228, "grad_norm": 3.4874300956726074, "learning_rate": 3.8794520547945204e-07, "log_odds_chosen": 4.834466457366943, "log_odds_ratio": -0.058414116501808167, "logits/chosen": 1.3357467651367188, "logits/rejected": 1.3905575275421143, "logps/chosen": -1.7747294902801514, "logps/rejected": -6.377312183380127, "loss": 0.5279, "nll_loss": 0.5220509767532349, "rewards/accuracies": 1.0, "rewards/chosen": -0.17747294902801514, "rewards/margins": 0.4602583050727844, "rewards/rejected": -0.6377312541007996, "step": 4468 }, { "epoch": 12.235455167693361, "grad_norm": 13.108991622924805, "learning_rate": 3.8780821917808214e-07, "log_odds_chosen": 1.046592354774475, "log_odds_ratio": -0.5939129590988159, "logits/chosen": 1.2700427770614624, "logits/rejected": 1.2158066034317017, "logps/chosen": -3.536085844039917, "logps/rejected": -4.47446870803833, "loss": 0.6465, "nll_loss": 0.5871449708938599, "rewards/accuracies": 0.75, "rewards/chosen": -0.3536085784435272, "rewards/margins": 0.09383831918239594, "rewards/rejected": -0.44744691252708435, "step": 4469 }, { "epoch": 12.238193018480493, "grad_norm": 6.769722938537598, "learning_rate": 3.8767123287671235e-07, "log_odds_chosen": 1.150089979171753, "log_odds_ratio": -0.4715184271335602, "logits/chosen": 0.8732113242149353, "logits/rejected": 0.8505488634109497, "logps/chosen": -2.9029500484466553, "logps/rejected": -3.9699525833129883, "loss": 0.6712, "nll_loss": 0.6240690350532532, "rewards/accuracies": 0.875, "rewards/chosen": -0.2902950048446655, "rewards/margins": 0.10670025646686554, "rewards/rejected": -0.39699530601501465, "step": 4470 }, { "epoch": 12.240930869267626, "grad_norm": 4.211198329925537, "learning_rate": 3.8753424657534244e-07, "log_odds_chosen": 2.939082384109497, "log_odds_ratio": -0.2455921471118927, "logits/chosen": 1.0218584537506104, "logits/rejected": 1.0765008926391602, "logps/chosen": -2.5239498615264893, "logps/rejected": -5.339508056640625, "loss": 0.7295, "nll_loss": 0.7049604058265686, "rewards/accuracies": 0.875, "rewards/chosen": -0.25239500403404236, "rewards/margins": 0.28155583143234253, "rewards/rejected": -0.5339508056640625, "step": 4471 }, { "epoch": 12.243668720054757, "grad_norm": 3.2429556846618652, "learning_rate": 3.873972602739726e-07, "log_odds_chosen": 3.1123316287994385, "log_odds_ratio": -0.2177196443080902, "logits/chosen": 0.9650994539260864, "logits/rejected": 0.9280803203582764, "logps/chosen": -1.5053479671478271, "logps/rejected": -4.349217414855957, "loss": 0.62, "nll_loss": 0.5981818437576294, "rewards/accuracies": 1.0, "rewards/chosen": -0.15053479373455048, "rewards/margins": 0.2843869626522064, "rewards/rejected": -0.4349217414855957, "step": 4472 }, { "epoch": 12.24640657084189, "grad_norm": 3.7283332347869873, "learning_rate": 3.8726027397260275e-07, "log_odds_chosen": 3.112149953842163, "log_odds_ratio": -0.21376781165599823, "logits/chosen": 1.1079163551330566, "logits/rejected": 1.0869653224945068, "logps/chosen": -2.0411250591278076, "logps/rejected": -5.024189472198486, "loss": 0.6479, "nll_loss": 0.6265467405319214, "rewards/accuracies": 1.0, "rewards/chosen": -0.20411249995231628, "rewards/margins": 0.29830649495124817, "rewards/rejected": -0.5024189949035645, "step": 4473 }, { "epoch": 12.24914442162902, "grad_norm": 3.84377121925354, "learning_rate": 3.8712328767123285e-07, "log_odds_chosen": 1.3626680374145508, "log_odds_ratio": -0.34991899132728577, "logits/chosen": 1.257601022720337, "logits/rejected": 1.2486209869384766, "logps/chosen": -1.5995668172836304, "logps/rejected": -2.7928783893585205, "loss": 0.5367, "nll_loss": 0.5017069578170776, "rewards/accuracies": 0.75, "rewards/chosen": -0.159956693649292, "rewards/margins": 0.11933113634586334, "rewards/rejected": -0.27928781509399414, "step": 4474 }, { "epoch": 12.251882272416154, "grad_norm": 3.8910627365112305, "learning_rate": 3.86986301369863e-07, "log_odds_chosen": 2.0080220699310303, "log_odds_ratio": -0.2807375490665436, "logits/chosen": 0.8293533325195312, "logits/rejected": 0.7875736355781555, "logps/chosen": -2.1454877853393555, "logps/rejected": -4.034486770629883, "loss": 0.6246, "nll_loss": 0.5965549945831299, "rewards/accuracies": 0.875, "rewards/chosen": -0.21454879641532898, "rewards/margins": 0.18889984488487244, "rewards/rejected": -0.4034486413002014, "step": 4475 }, { "epoch": 12.254620123203285, "grad_norm": 3.5679807662963867, "learning_rate": 3.868493150684931e-07, "log_odds_chosen": 4.849912166595459, "log_odds_ratio": -0.07834933698177338, "logits/chosen": 1.2622458934783936, "logits/rejected": 1.3209211826324463, "logps/chosen": -2.177100419998169, "logps/rejected": -6.879781246185303, "loss": 0.5418, "nll_loss": 0.5339645743370056, "rewards/accuracies": 1.0, "rewards/chosen": -0.21771004796028137, "rewards/margins": 0.4702680706977844, "rewards/rejected": -0.6879781484603882, "step": 4476 }, { "epoch": 12.257357973990418, "grad_norm": 3.619187116622925, "learning_rate": 3.867123287671233e-07, "log_odds_chosen": 3.1577014923095703, "log_odds_ratio": -0.22967970371246338, "logits/chosen": 1.239762783050537, "logits/rejected": 1.2452740669250488, "logps/chosen": -1.8761733770370483, "logps/rejected": -4.678097248077393, "loss": 0.6248, "nll_loss": 0.6017892360687256, "rewards/accuracies": 0.875, "rewards/chosen": -0.18761733174324036, "rewards/margins": 0.28019237518310547, "rewards/rejected": -0.46780967712402344, "step": 4477 }, { "epoch": 12.260095824777549, "grad_norm": 3.3818047046661377, "learning_rate": 3.865753424657534e-07, "log_odds_chosen": 2.587871551513672, "log_odds_ratio": -0.22857552766799927, "logits/chosen": 1.0234295129776, "logits/rejected": 1.0152769088745117, "logps/chosen": -1.5904996395111084, "logps/rejected": -3.984992504119873, "loss": 0.4787, "nll_loss": 0.45581287145614624, "rewards/accuracies": 1.0, "rewards/chosen": -0.15904995799064636, "rewards/margins": 0.23944929242134094, "rewards/rejected": -0.3984992504119873, "step": 4478 }, { "epoch": 12.262833675564682, "grad_norm": 3.526589870452881, "learning_rate": 3.864383561643835e-07, "log_odds_chosen": 2.8373031616210938, "log_odds_ratio": -0.18767206370830536, "logits/chosen": 0.9961928129196167, "logits/rejected": 1.0286524295806885, "logps/chosen": -1.707511305809021, "logps/rejected": -4.259186744689941, "loss": 0.5456, "nll_loss": 0.5268552303314209, "rewards/accuracies": 0.875, "rewards/chosen": -0.17075112462043762, "rewards/margins": 0.25516757369041443, "rewards/rejected": -0.42591869831085205, "step": 4479 }, { "epoch": 12.265571526351813, "grad_norm": 4.049506187438965, "learning_rate": 3.863013698630137e-07, "log_odds_chosen": 2.4789679050445557, "log_odds_ratio": -0.22959670424461365, "logits/chosen": 0.9950853586196899, "logits/rejected": 1.1210004091262817, "logps/chosen": -2.0383925437927246, "logps/rejected": -4.340679168701172, "loss": 0.5258, "nll_loss": 0.5028759837150574, "rewards/accuracies": 0.875, "rewards/chosen": -0.2038392573595047, "rewards/margins": 0.23022869229316711, "rewards/rejected": -0.434067964553833, "step": 4480 }, { "epoch": 12.268309377138946, "grad_norm": 5.299464702606201, "learning_rate": 3.861643835616438e-07, "log_odds_chosen": 3.151534080505371, "log_odds_ratio": -0.38858941197395325, "logits/chosen": 1.0511994361877441, "logits/rejected": 1.0742310285568237, "logps/chosen": -2.3329739570617676, "logps/rejected": -5.283807277679443, "loss": 0.5462, "nll_loss": 0.5073050856590271, "rewards/accuracies": 0.875, "rewards/chosen": -0.23329739272594452, "rewards/margins": 0.29508331418037415, "rewards/rejected": -0.5283806920051575, "step": 4481 }, { "epoch": 12.271047227926077, "grad_norm": 3.2258565425872803, "learning_rate": 3.8602739726027396e-07, "log_odds_chosen": 3.469846248626709, "log_odds_ratio": -0.14749036729335785, "logits/chosen": 1.2613390684127808, "logits/rejected": 1.2187310457229614, "logps/chosen": -1.3736600875854492, "logps/rejected": -4.500389099121094, "loss": 0.569, "nll_loss": 0.5542970895767212, "rewards/accuracies": 1.0, "rewards/chosen": -0.13736601173877716, "rewards/margins": 0.31267285346984863, "rewards/rejected": -0.4500389099121094, "step": 4482 }, { "epoch": 12.27378507871321, "grad_norm": 3.6296629905700684, "learning_rate": 3.8589041095890406e-07, "log_odds_chosen": 4.046389102935791, "log_odds_ratio": -0.2668735980987549, "logits/chosen": 1.1049302816390991, "logits/rejected": 1.141287922859192, "logps/chosen": -2.055250644683838, "logps/rejected": -5.96114444732666, "loss": 0.6572, "nll_loss": 0.6305044293403625, "rewards/accuracies": 0.875, "rewards/chosen": -0.20552508533000946, "rewards/margins": 0.3905894458293915, "rewards/rejected": -0.5961145162582397, "step": 4483 }, { "epoch": 12.276522929500342, "grad_norm": 5.012442588806152, "learning_rate": 3.8575342465753426e-07, "log_odds_chosen": 1.7151960134506226, "log_odds_ratio": -0.3948807120323181, "logits/chosen": 1.0415583848953247, "logits/rejected": 1.042134404182434, "logps/chosen": -2.669194221496582, "logps/rejected": -4.265560150146484, "loss": 0.6251, "nll_loss": 0.5856530070304871, "rewards/accuracies": 0.75, "rewards/chosen": -0.26691943407058716, "rewards/margins": 0.15963657200336456, "rewards/rejected": -0.4265559911727905, "step": 4484 }, { "epoch": 12.279260780287474, "grad_norm": 3.5671725273132324, "learning_rate": 3.8561643835616436e-07, "log_odds_chosen": 2.4877729415893555, "log_odds_ratio": -0.16958904266357422, "logits/chosen": 1.4768637418746948, "logits/rejected": 1.4184620380401611, "logps/chosen": -1.6342730522155762, "logps/rejected": -3.9400620460510254, "loss": 0.5245, "nll_loss": 0.5075296759605408, "rewards/accuracies": 1.0, "rewards/chosen": -0.16342730820178986, "rewards/margins": 0.23057889938354492, "rewards/rejected": -0.39400625228881836, "step": 4485 }, { "epoch": 12.281998631074606, "grad_norm": 8.407062530517578, "learning_rate": 3.8547945205479446e-07, "log_odds_chosen": 1.7739002704620361, "log_odds_ratio": -0.4557609260082245, "logits/chosen": 0.9419931769371033, "logits/rejected": 0.9324025511741638, "logps/chosen": -2.019416093826294, "logps/rejected": -3.588343858718872, "loss": 0.5608, "nll_loss": 0.5152396559715271, "rewards/accuracies": 0.875, "rewards/chosen": -0.2019415944814682, "rewards/margins": 0.156892791390419, "rewards/rejected": -0.3588343858718872, "step": 4486 }, { "epoch": 12.284736481861739, "grad_norm": 4.641866683959961, "learning_rate": 3.8534246575342467e-07, "log_odds_chosen": 3.0825881958007812, "log_odds_ratio": -0.25071537494659424, "logits/chosen": 0.9664915800094604, "logits/rejected": 0.993333637714386, "logps/chosen": -2.0648646354675293, "logps/rejected": -4.831162452697754, "loss": 0.6689, "nll_loss": 0.6438726186752319, "rewards/accuracies": 0.875, "rewards/chosen": -0.20648646354675293, "rewards/margins": 0.27662980556488037, "rewards/rejected": -0.4831162691116333, "step": 4487 }, { "epoch": 12.28747433264887, "grad_norm": 3.3964452743530273, "learning_rate": 3.8520547945205477e-07, "log_odds_chosen": 1.8243999481201172, "log_odds_ratio": -0.3053627014160156, "logits/chosen": 1.0323232412338257, "logits/rejected": 1.1256356239318848, "logps/chosen": -1.891287088394165, "logps/rejected": -3.58621883392334, "loss": 0.5795, "nll_loss": 0.5489176511764526, "rewards/accuracies": 0.875, "rewards/chosen": -0.18912872672080994, "rewards/margins": 0.16949313879013062, "rewards/rejected": -0.35862183570861816, "step": 4488 }, { "epoch": 12.290212183436003, "grad_norm": 4.618884086608887, "learning_rate": 3.850684931506849e-07, "log_odds_chosen": 1.7636032104492188, "log_odds_ratio": -0.25889503955841064, "logits/chosen": 0.8841838836669922, "logits/rejected": 0.8808298110961914, "logps/chosen": -2.447284698486328, "logps/rejected": -4.102884292602539, "loss": 0.57, "nll_loss": 0.5441457033157349, "rewards/accuracies": 0.875, "rewards/chosen": -0.2447284758090973, "rewards/margins": 0.16555996239185333, "rewards/rejected": -0.41028842329978943, "step": 4489 }, { "epoch": 12.292950034223136, "grad_norm": 6.84762716293335, "learning_rate": 3.84931506849315e-07, "log_odds_chosen": 1.5416464805603027, "log_odds_ratio": -0.3389068841934204, "logits/chosen": 1.0259146690368652, "logits/rejected": 0.9797482490539551, "logps/chosen": -1.8736504316329956, "logps/rejected": -3.194333553314209, "loss": 0.5449, "nll_loss": 0.5110089778900146, "rewards/accuracies": 0.75, "rewards/chosen": -0.18736505508422852, "rewards/margins": 0.13206830620765686, "rewards/rejected": -0.3194333612918854, "step": 4490 }, { "epoch": 12.295687885010267, "grad_norm": 3.294067621231079, "learning_rate": 3.847945205479452e-07, "log_odds_chosen": 3.1157009601593018, "log_odds_ratio": -0.19181585311889648, "logits/chosen": 0.8990899324417114, "logits/rejected": 0.8634514808654785, "logps/chosen": -1.4193825721740723, "logps/rejected": -4.316427230834961, "loss": 0.511, "nll_loss": 0.49178287386894226, "rewards/accuracies": 1.0, "rewards/chosen": -0.14193826913833618, "rewards/margins": 0.28970447182655334, "rewards/rejected": -0.4316427707672119, "step": 4491 }, { "epoch": 12.2984257357974, "grad_norm": 3.787611484527588, "learning_rate": 3.846575342465753e-07, "log_odds_chosen": 1.6026886701583862, "log_odds_ratio": -0.4444901645183563, "logits/chosen": 0.8470669984817505, "logits/rejected": 0.7901525497436523, "logps/chosen": -2.2383875846862793, "logps/rejected": -3.735914707183838, "loss": 0.5852, "nll_loss": 0.5407606363296509, "rewards/accuracies": 0.875, "rewards/chosen": -0.22383877635002136, "rewards/margins": 0.14975270628929138, "rewards/rejected": -0.37359148263931274, "step": 4492 }, { "epoch": 12.301163586584531, "grad_norm": 4.120406627655029, "learning_rate": 3.845205479452054e-07, "log_odds_chosen": 1.9478684663772583, "log_odds_ratio": -0.28812992572784424, "logits/chosen": 0.7825982570648193, "logits/rejected": 0.6753591299057007, "logps/chosen": -2.228635311126709, "logps/rejected": -4.0787272453308105, "loss": 0.6007, "nll_loss": 0.5718587040901184, "rewards/accuracies": 1.0, "rewards/chosen": -0.2228635549545288, "rewards/margins": 0.1850091814994812, "rewards/rejected": -0.40787273645401, "step": 4493 }, { "epoch": 12.303901437371664, "grad_norm": 7.909775733947754, "learning_rate": 3.843835616438356e-07, "log_odds_chosen": 2.2192695140838623, "log_odds_ratio": -0.2756064832210541, "logits/chosen": 0.9956378936767578, "logits/rejected": 0.8655719757080078, "logps/chosen": -1.9222626686096191, "logps/rejected": -3.94561505317688, "loss": 0.6109, "nll_loss": 0.5833315849304199, "rewards/accuracies": 0.875, "rewards/chosen": -0.19222627580165863, "rewards/margins": 0.20233523845672607, "rewards/rejected": -0.3945615291595459, "step": 4494 }, { "epoch": 12.306639288158795, "grad_norm": 4.049422740936279, "learning_rate": 3.842465753424657e-07, "log_odds_chosen": 1.9406760931015015, "log_odds_ratio": -0.27385222911834717, "logits/chosen": 0.778090238571167, "logits/rejected": 0.7337938547134399, "logps/chosen": -1.6705164909362793, "logps/rejected": -3.4480931758880615, "loss": 0.5876, "nll_loss": 0.5602041482925415, "rewards/accuracies": 0.875, "rewards/chosen": -0.16705164313316345, "rewards/margins": 0.17775769531726837, "rewards/rejected": -0.34480932354927063, "step": 4495 }, { "epoch": 12.309377138945928, "grad_norm": 3.1055190563201904, "learning_rate": 3.841095890410959e-07, "log_odds_chosen": 3.5703370571136475, "log_odds_ratio": -0.16064129769802094, "logits/chosen": 1.0459418296813965, "logits/rejected": 1.087342381477356, "logps/chosen": -1.5644986629486084, "logps/rejected": -4.879768371582031, "loss": 0.575, "nll_loss": 0.5589578151702881, "rewards/accuracies": 1.0, "rewards/chosen": -0.15644986927509308, "rewards/margins": 0.3315269649028778, "rewards/rejected": -0.4879768192768097, "step": 4496 }, { "epoch": 12.31211498973306, "grad_norm": 5.4117207527160645, "learning_rate": 3.8397260273972603e-07, "log_odds_chosen": 1.475445032119751, "log_odds_ratio": -0.34520161151885986, "logits/chosen": 1.0753147602081299, "logits/rejected": 1.0919636487960815, "logps/chosen": -2.5571014881134033, "logps/rejected": -3.9372434616088867, "loss": 0.5723, "nll_loss": 0.5377429723739624, "rewards/accuracies": 0.875, "rewards/chosen": -0.2557101547718048, "rewards/margins": 0.13801422715187073, "rewards/rejected": -0.39372438192367554, "step": 4497 }, { "epoch": 12.314852840520192, "grad_norm": 3.935408592224121, "learning_rate": 3.838356164383562e-07, "log_odds_chosen": 1.968634009361267, "log_odds_ratio": -0.22458645701408386, "logits/chosen": 1.1548881530761719, "logits/rejected": 1.0651805400848389, "logps/chosen": -1.3362797498703003, "logps/rejected": -3.069629669189453, "loss": 0.4905, "nll_loss": 0.46803006529808044, "rewards/accuracies": 0.875, "rewards/chosen": -0.1336279809474945, "rewards/margins": 0.1733349859714508, "rewards/rejected": -0.3069629669189453, "step": 4498 }, { "epoch": 12.317590691307323, "grad_norm": 3.846043109893799, "learning_rate": 3.836986301369863e-07, "log_odds_chosen": 1.5763875246047974, "log_odds_ratio": -0.2892848253250122, "logits/chosen": 1.0822588205337524, "logits/rejected": 1.0062438249588013, "logps/chosen": -1.8681325912475586, "logps/rejected": -3.2914795875549316, "loss": 0.5637, "nll_loss": 0.5347898006439209, "rewards/accuracies": 1.0, "rewards/chosen": -0.18681325018405914, "rewards/margins": 0.1423346996307373, "rewards/rejected": -0.32914793491363525, "step": 4499 }, { "epoch": 12.320328542094456, "grad_norm": 3.1296257972717285, "learning_rate": 3.835616438356164e-07, "log_odds_chosen": 5.323884010314941, "log_odds_ratio": -0.1255876123905182, "logits/chosen": 1.0434074401855469, "logits/rejected": 1.0026450157165527, "logps/chosen": -1.607602834701538, "logps/rejected": -6.688068866729736, "loss": 0.4857, "nll_loss": 0.47312673926353455, "rewards/accuracies": 1.0, "rewards/chosen": -0.1607602834701538, "rewards/margins": 0.5080466270446777, "rewards/rejected": -0.6688069105148315, "step": 4500 }, { "epoch": 12.323066392881588, "grad_norm": 3.4871230125427246, "learning_rate": 3.834246575342466e-07, "log_odds_chosen": 2.222151279449463, "log_odds_ratio": -0.2964319586753845, "logits/chosen": 0.7966050505638123, "logits/rejected": 0.7576804161071777, "logps/chosen": -1.5567091703414917, "logps/rejected": -3.6563820838928223, "loss": 0.5138, "nll_loss": 0.4841933250427246, "rewards/accuracies": 0.75, "rewards/chosen": -0.1556709259748459, "rewards/margins": 0.20996731519699097, "rewards/rejected": -0.36563822627067566, "step": 4501 }, { "epoch": 12.32580424366872, "grad_norm": 7.459299087524414, "learning_rate": 3.832876712328767e-07, "log_odds_chosen": 3.0062782764434814, "log_odds_ratio": -0.7820981740951538, "logits/chosen": 1.0402932167053223, "logits/rejected": 1.0804271697998047, "logps/chosen": -2.877159833908081, "logps/rejected": -5.78165340423584, "loss": 0.6388, "nll_loss": 0.5605701208114624, "rewards/accuracies": 0.75, "rewards/chosen": -0.28771597146987915, "rewards/margins": 0.2904493510723114, "rewards/rejected": -0.5781653523445129, "step": 4502 }, { "epoch": 12.328542094455852, "grad_norm": 4.831504821777344, "learning_rate": 3.8315068493150683e-07, "log_odds_chosen": 2.0292274951934814, "log_odds_ratio": -0.3725239932537079, "logits/chosen": 1.0602359771728516, "logits/rejected": 1.1055806875228882, "logps/chosen": -2.6701579093933105, "logps/rejected": -4.602564811706543, "loss": 0.6344, "nll_loss": 0.597135066986084, "rewards/accuracies": 0.75, "rewards/chosen": -0.26701581478118896, "rewards/margins": 0.19324065744876862, "rewards/rejected": -0.4602564871311188, "step": 4503 }, { "epoch": 12.331279945242985, "grad_norm": 3.391169548034668, "learning_rate": 3.83013698630137e-07, "log_odds_chosen": 1.9637384414672852, "log_odds_ratio": -0.2088453322649002, "logits/chosen": 1.1063307523727417, "logits/rejected": 1.1311566829681396, "logps/chosen": -2.139962911605835, "logps/rejected": -3.943824052810669, "loss": 0.5619, "nll_loss": 0.5409768223762512, "rewards/accuracies": 1.0, "rewards/chosen": -0.2139962911605835, "rewards/margins": 0.18038609623908997, "rewards/rejected": -0.39438238739967346, "step": 4504 }, { "epoch": 12.334017796030116, "grad_norm": 3.5183286666870117, "learning_rate": 3.8287671232876714e-07, "log_odds_chosen": 2.7939388751983643, "log_odds_ratio": -0.12062781304121017, "logits/chosen": 0.8917568922042847, "logits/rejected": 0.9561616778373718, "logps/chosen": -2.384324073791504, "logps/rejected": -5.070647239685059, "loss": 0.7371, "nll_loss": 0.7249957323074341, "rewards/accuracies": 1.0, "rewards/chosen": -0.2384324073791504, "rewards/margins": 0.26863229274749756, "rewards/rejected": -0.507064700126648, "step": 4505 }, { "epoch": 12.336755646817249, "grad_norm": 8.062040328979492, "learning_rate": 3.8273972602739724e-07, "log_odds_chosen": 2.7875428199768066, "log_odds_ratio": -0.8108771443367004, "logits/chosen": 1.1964542865753174, "logits/rejected": 1.1623302698135376, "logps/chosen": -2.7261338233947754, "logps/rejected": -5.402679920196533, "loss": 0.6603, "nll_loss": 0.5791906118392944, "rewards/accuracies": 0.75, "rewards/chosen": -0.27261340618133545, "rewards/margins": 0.2676545977592468, "rewards/rejected": -0.5402680039405823, "step": 4506 }, { "epoch": 12.33949349760438, "grad_norm": 3.6129801273345947, "learning_rate": 3.8260273972602734e-07, "log_odds_chosen": 2.7926974296569824, "log_odds_ratio": -0.2531498372554779, "logits/chosen": 0.808251678943634, "logits/rejected": 0.7542084455490112, "logps/chosen": -1.9560999870300293, "logps/rejected": -4.5703654289245605, "loss": 0.572, "nll_loss": 0.54673171043396, "rewards/accuracies": 1.0, "rewards/chosen": -0.19561001658439636, "rewards/margins": 0.26142653822898865, "rewards/rejected": -0.457036554813385, "step": 4507 }, { "epoch": 12.342231348391513, "grad_norm": 3.6671414375305176, "learning_rate": 3.8246575342465754e-07, "log_odds_chosen": 3.1703615188598633, "log_odds_ratio": -0.1389237344264984, "logits/chosen": 0.7866445779800415, "logits/rejected": 0.5569430589675903, "logps/chosen": -1.768418550491333, "logps/rejected": -4.7426252365112305, "loss": 0.5909, "nll_loss": 0.5769660472869873, "rewards/accuracies": 1.0, "rewards/chosen": -0.1768418848514557, "rewards/margins": 0.2974206209182739, "rewards/rejected": -0.4742625057697296, "step": 4508 }, { "epoch": 12.344969199178644, "grad_norm": 4.7378249168396, "learning_rate": 3.8232876712328764e-07, "log_odds_chosen": 2.0403101444244385, "log_odds_ratio": -0.2665000259876251, "logits/chosen": 1.1485052108764648, "logits/rejected": 1.185577630996704, "logps/chosen": -2.6145029067993164, "logps/rejected": -4.5514726638793945, "loss": 0.5535, "nll_loss": 0.5268750190734863, "rewards/accuracies": 0.875, "rewards/chosen": -0.26145029067993164, "rewards/margins": 0.19369691610336304, "rewards/rejected": -0.4551472067832947, "step": 4509 }, { "epoch": 12.347707049965777, "grad_norm": 7.527416229248047, "learning_rate": 3.821917808219178e-07, "log_odds_chosen": 3.0117602348327637, "log_odds_ratio": -0.37155771255493164, "logits/chosen": 0.9266935586929321, "logits/rejected": 0.8750478029251099, "logps/chosen": -2.2769200801849365, "logps/rejected": -5.108590126037598, "loss": 0.6432, "nll_loss": 0.6060302257537842, "rewards/accuracies": 0.875, "rewards/chosen": -0.22769200801849365, "rewards/margins": 0.2831670045852661, "rewards/rejected": -0.5108590126037598, "step": 4510 }, { "epoch": 12.350444900752908, "grad_norm": 14.213190078735352, "learning_rate": 3.8205479452054795e-07, "log_odds_chosen": 1.3901867866516113, "log_odds_ratio": -0.7345693111419678, "logits/chosen": 1.301945447921753, "logits/rejected": 1.2177340984344482, "logps/chosen": -3.210209369659424, "logps/rejected": -4.486166954040527, "loss": 0.7526, "nll_loss": 0.679131269454956, "rewards/accuracies": 0.75, "rewards/chosen": -0.3210209608078003, "rewards/margins": 0.12759575247764587, "rewards/rejected": -0.44861671328544617, "step": 4511 }, { "epoch": 12.353182751540041, "grad_norm": 3.607175588607788, "learning_rate": 3.819178082191781e-07, "log_odds_chosen": 2.984086036682129, "log_odds_ratio": -0.14873750507831573, "logits/chosen": 1.3610420227050781, "logits/rejected": 1.3232501745224, "logps/chosen": -1.6762826442718506, "logps/rejected": -4.452460289001465, "loss": 0.5446, "nll_loss": 0.5297693610191345, "rewards/accuracies": 1.0, "rewards/chosen": -0.16762825846672058, "rewards/margins": 0.27761781215667725, "rewards/rejected": -0.44524604082107544, "step": 4512 }, { "epoch": 12.355920602327172, "grad_norm": 3.802029609680176, "learning_rate": 3.817808219178082e-07, "log_odds_chosen": 2.372427463531494, "log_odds_ratio": -0.2804090976715088, "logits/chosen": 0.9614105224609375, "logits/rejected": 0.9803718328475952, "logps/chosen": -1.5194799900054932, "logps/rejected": -3.7092127799987793, "loss": 0.4866, "nll_loss": 0.4586007595062256, "rewards/accuracies": 0.875, "rewards/chosen": -0.1519480049610138, "rewards/margins": 0.218973308801651, "rewards/rejected": -0.3709213137626648, "step": 4513 }, { "epoch": 12.358658453114305, "grad_norm": 3.7534019947052, "learning_rate": 3.816438356164383e-07, "log_odds_chosen": 2.634047031402588, "log_odds_ratio": -0.19626730680465698, "logits/chosen": 1.3770129680633545, "logits/rejected": 1.406301498413086, "logps/chosen": -1.7119392156600952, "logps/rejected": -4.179769515991211, "loss": 0.466, "nll_loss": 0.44632816314697266, "rewards/accuracies": 1.0, "rewards/chosen": -0.171193927526474, "rewards/margins": 0.246783047914505, "rewards/rejected": -0.417976975440979, "step": 4514 }, { "epoch": 12.361396303901437, "grad_norm": 3.3276569843292236, "learning_rate": 3.815068493150685e-07, "log_odds_chosen": 3.096491813659668, "log_odds_ratio": -0.14284881949424744, "logits/chosen": 1.1519484519958496, "logits/rejected": 1.0870845317840576, "logps/chosen": -1.598698377609253, "logps/rejected": -4.4210920333862305, "loss": 0.4908, "nll_loss": 0.47655001282691956, "rewards/accuracies": 1.0, "rewards/chosen": -0.15986984968185425, "rewards/margins": 0.2822393476963043, "rewards/rejected": -0.44210919737815857, "step": 4515 }, { "epoch": 12.36413415468857, "grad_norm": 3.0022988319396973, "learning_rate": 3.813698630136986e-07, "log_odds_chosen": 2.680103302001953, "log_odds_ratio": -0.15481162071228027, "logits/chosen": 1.1704355478286743, "logits/rejected": 1.1183074712753296, "logps/chosen": -1.796910285949707, "logps/rejected": -4.2534990310668945, "loss": 0.4735, "nll_loss": 0.45801547169685364, "rewards/accuracies": 1.0, "rewards/chosen": -0.17969103157520294, "rewards/margins": 0.24565884470939636, "rewards/rejected": -0.4253498911857605, "step": 4516 }, { "epoch": 12.366872005475702, "grad_norm": 3.9456686973571777, "learning_rate": 3.8123287671232875e-07, "log_odds_chosen": 4.25923490524292, "log_odds_ratio": -0.06694852560758591, "logits/chosen": 1.3613660335540771, "logits/rejected": 1.4524836540222168, "logps/chosen": -1.939035177230835, "logps/rejected": -6.0047454833984375, "loss": 0.6426, "nll_loss": 0.6358932256698608, "rewards/accuracies": 1.0, "rewards/chosen": -0.19390350580215454, "rewards/margins": 0.40657100081443787, "rewards/rejected": -0.6004745364189148, "step": 4517 }, { "epoch": 12.369609856262834, "grad_norm": 3.603935718536377, "learning_rate": 3.810958904109589e-07, "log_odds_chosen": 0.9777915477752686, "log_odds_ratio": -0.4173695147037506, "logits/chosen": 1.2136129140853882, "logits/rejected": 1.1709072589874268, "logps/chosen": -2.2383434772491455, "logps/rejected": -3.14648175239563, "loss": 0.5765, "nll_loss": 0.5347567200660706, "rewards/accuracies": 0.875, "rewards/chosen": -0.22383436560630798, "rewards/margins": 0.09081382304430008, "rewards/rejected": -0.31464818120002747, "step": 4518 }, { "epoch": 12.372347707049967, "grad_norm": 4.205704689025879, "learning_rate": 3.8095890410958906e-07, "log_odds_chosen": 1.3538752794265747, "log_odds_ratio": -0.3402026295661926, "logits/chosen": 1.0904661417007446, "logits/rejected": 1.0109244585037231, "logps/chosen": -1.5242469310760498, "logps/rejected": -2.6655967235565186, "loss": 0.4712, "nll_loss": 0.43721574544906616, "rewards/accuracies": 0.75, "rewards/chosen": -0.15242469310760498, "rewards/margins": 0.11413497477769852, "rewards/rejected": -0.2665596604347229, "step": 4519 }, { "epoch": 12.375085557837098, "grad_norm": 3.7127139568328857, "learning_rate": 3.8082191780821916e-07, "log_odds_chosen": 1.4306467771530151, "log_odds_ratio": -0.4078967571258545, "logits/chosen": 0.6553347706794739, "logits/rejected": 0.7069696187973022, "logps/chosen": -1.4183590412139893, "logps/rejected": -2.6368541717529297, "loss": 0.5665, "nll_loss": 0.5257598757743835, "rewards/accuracies": 0.75, "rewards/chosen": -0.14183589816093445, "rewards/margins": 0.12184952199459076, "rewards/rejected": -0.2636854350566864, "step": 4520 }, { "epoch": 12.37782340862423, "grad_norm": 3.6527750492095947, "learning_rate": 3.8068493150684925e-07, "log_odds_chosen": 1.668673038482666, "log_odds_ratio": -0.31063583493232727, "logits/chosen": 1.1642913818359375, "logits/rejected": 1.1224724054336548, "logps/chosen": -2.0483641624450684, "logps/rejected": -3.612422466278076, "loss": 0.5378, "nll_loss": 0.5067673921585083, "rewards/accuracies": 1.0, "rewards/chosen": -0.2048364281654358, "rewards/margins": 0.15640583634376526, "rewards/rejected": -0.36124223470687866, "step": 4521 }, { "epoch": 12.380561259411362, "grad_norm": 3.8828065395355225, "learning_rate": 3.8054794520547946e-07, "log_odds_chosen": 1.7233614921569824, "log_odds_ratio": -0.35031941533088684, "logits/chosen": 1.1602938175201416, "logits/rejected": 1.1354639530181885, "logps/chosen": -2.0937771797180176, "logps/rejected": -3.708536148071289, "loss": 0.552, "nll_loss": 0.5170020461082458, "rewards/accuracies": 1.0, "rewards/chosen": -0.2093777358531952, "rewards/margins": 0.16147586703300476, "rewards/rejected": -0.37085360288619995, "step": 4522 }, { "epoch": 12.383299110198495, "grad_norm": 3.6592302322387695, "learning_rate": 3.8041095890410956e-07, "log_odds_chosen": 3.9257924556732178, "log_odds_ratio": -0.10693712532520294, "logits/chosen": 1.4109432697296143, "logits/rejected": 1.473433494567871, "logps/chosen": -2.0539355278015137, "logps/rejected": -5.807778358459473, "loss": 0.4861, "nll_loss": 0.4754079580307007, "rewards/accuracies": 1.0, "rewards/chosen": -0.20539356768131256, "rewards/margins": 0.37538430094718933, "rewards/rejected": -0.5807778835296631, "step": 4523 }, { "epoch": 12.386036960985626, "grad_norm": 3.6119327545166016, "learning_rate": 3.802739726027397e-07, "log_odds_chosen": 2.2782201766967773, "log_odds_ratio": -0.21241594851016998, "logits/chosen": 0.8958067893981934, "logits/rejected": 0.9117805361747742, "logps/chosen": -2.2710208892822266, "logps/rejected": -4.357634544372559, "loss": 0.5395, "nll_loss": 0.5182985663414001, "rewards/accuracies": 1.0, "rewards/chosen": -0.2271021008491516, "rewards/margins": 0.20866137742996216, "rewards/rejected": -0.43576350808143616, "step": 4524 }, { "epoch": 12.388774811772759, "grad_norm": 3.780898332595825, "learning_rate": 3.8013698630136986e-07, "log_odds_chosen": 2.59100341796875, "log_odds_ratio": -0.228115975856781, "logits/chosen": 0.9809694886207581, "logits/rejected": 0.9126570224761963, "logps/chosen": -1.5014840364456177, "logps/rejected": -3.8900580406188965, "loss": 0.4745, "nll_loss": 0.451709121465683, "rewards/accuracies": 1.0, "rewards/chosen": -0.150148406624794, "rewards/margins": 0.23885738849639893, "rewards/rejected": -0.3890058100223541, "step": 4525 }, { "epoch": 12.39151266255989, "grad_norm": 3.9424502849578857, "learning_rate": 3.7999999999999996e-07, "log_odds_chosen": 4.2688188552856445, "log_odds_ratio": -0.14615467190742493, "logits/chosen": 0.9633760452270508, "logits/rejected": 1.018375039100647, "logps/chosen": -1.3611125946044922, "logps/rejected": -5.248347282409668, "loss": 0.5467, "nll_loss": 0.5320917367935181, "rewards/accuracies": 1.0, "rewards/chosen": -0.1361112743616104, "rewards/margins": 0.3887234330177307, "rewards/rejected": -0.5248346924781799, "step": 4526 }, { "epoch": 12.394250513347023, "grad_norm": 4.888333797454834, "learning_rate": 3.798630136986301e-07, "log_odds_chosen": 2.1905364990234375, "log_odds_ratio": -0.24699586629867554, "logits/chosen": 0.8886842131614685, "logits/rejected": 0.8525137305259705, "logps/chosen": -1.8283292055130005, "logps/rejected": -3.8684444427490234, "loss": 0.65, "nll_loss": 0.625304102897644, "rewards/accuracies": 1.0, "rewards/chosen": -0.18283292651176453, "rewards/margins": 0.20401151478290558, "rewards/rejected": -0.3868444561958313, "step": 4527 }, { "epoch": 12.396988364134154, "grad_norm": 4.090712070465088, "learning_rate": 3.797260273972602e-07, "log_odds_chosen": 2.4848129749298096, "log_odds_ratio": -0.23903177678585052, "logits/chosen": 1.0519232749938965, "logits/rejected": 0.9211198091506958, "logps/chosen": -1.141886591911316, "logps/rejected": -3.3283469676971436, "loss": 0.5027, "nll_loss": 0.4788393974304199, "rewards/accuracies": 1.0, "rewards/chosen": -0.11418866366147995, "rewards/margins": 0.21864603459835052, "rewards/rejected": -0.33283472061157227, "step": 4528 }, { "epoch": 12.399726214921287, "grad_norm": 3.072796106338501, "learning_rate": 3.795890410958904e-07, "log_odds_chosen": 2.7697179317474365, "log_odds_ratio": -0.11410116404294968, "logits/chosen": 1.287338376045227, "logits/rejected": 1.2995545864105225, "logps/chosen": -1.7900493144989014, "logps/rejected": -4.367836952209473, "loss": 0.4822, "nll_loss": 0.4708319306373596, "rewards/accuracies": 1.0, "rewards/chosen": -0.17900492250919342, "rewards/margins": 0.25777873396873474, "rewards/rejected": -0.43678364157676697, "step": 4529 }, { "epoch": 12.402464065708418, "grad_norm": 3.229111909866333, "learning_rate": 3.794520547945205e-07, "log_odds_chosen": 3.1032862663269043, "log_odds_ratio": -0.12190581113100052, "logits/chosen": 0.9084864258766174, "logits/rejected": 0.8588390350341797, "logps/chosen": -1.9500839710235596, "logps/rejected": -4.8687944412231445, "loss": 0.6037, "nll_loss": 0.5914869904518127, "rewards/accuracies": 1.0, "rewards/chosen": -0.19500839710235596, "rewards/margins": 0.291871041059494, "rewards/rejected": -0.4868794083595276, "step": 4530 }, { "epoch": 12.405201916495551, "grad_norm": 3.5048842430114746, "learning_rate": 3.7931506849315067e-07, "log_odds_chosen": 2.084730625152588, "log_odds_ratio": -0.22353112697601318, "logits/chosen": 1.1253983974456787, "logits/rejected": 1.097773790359497, "logps/chosen": -1.6020753383636475, "logps/rejected": -3.5133001804351807, "loss": 0.4545, "nll_loss": 0.4321337342262268, "rewards/accuracies": 1.0, "rewards/chosen": -0.16020753979682922, "rewards/margins": 0.19112247228622437, "rewards/rejected": -0.3513299822807312, "step": 4531 }, { "epoch": 12.407939767282683, "grad_norm": 4.284119606018066, "learning_rate": 3.791780821917808e-07, "log_odds_chosen": 2.5823330879211426, "log_odds_ratio": -0.24081125855445862, "logits/chosen": 0.8960263729095459, "logits/rejected": 0.8001417517662048, "logps/chosen": -1.3183046579360962, "logps/rejected": -3.653757095336914, "loss": 0.6187, "nll_loss": 0.5946572422981262, "rewards/accuracies": 1.0, "rewards/chosen": -0.13183046877384186, "rewards/margins": 0.23354525864124298, "rewards/rejected": -0.36537569761276245, "step": 4532 }, { "epoch": 12.410677618069816, "grad_norm": 4.010059356689453, "learning_rate": 3.790410958904109e-07, "log_odds_chosen": 3.089796304702759, "log_odds_ratio": -0.3599244952201843, "logits/chosen": 1.2968223094940186, "logits/rejected": 1.283555507659912, "logps/chosen": -2.202430248260498, "logps/rejected": -5.222427845001221, "loss": 0.5803, "nll_loss": 0.5443065762519836, "rewards/accuracies": 0.875, "rewards/chosen": -0.22024303674697876, "rewards/margins": 0.3019997775554657, "rewards/rejected": -0.5222427845001221, "step": 4533 }, { "epoch": 12.413415468856947, "grad_norm": 5.06363582611084, "learning_rate": 3.7890410958904107e-07, "log_odds_chosen": 0.169939786195755, "log_odds_ratio": -0.7948639392852783, "logits/chosen": 1.270738124847412, "logits/rejected": 1.2727041244506836, "logps/chosen": -2.2916693687438965, "logps/rejected": -2.3847789764404297, "loss": 0.6025, "nll_loss": 0.5230399966239929, "rewards/accuracies": 0.75, "rewards/chosen": -0.2291669249534607, "rewards/margins": 0.009310968220233917, "rewards/rejected": -0.238477885723114, "step": 4534 }, { "epoch": 12.41615331964408, "grad_norm": 4.1160569190979, "learning_rate": 3.787671232876712e-07, "log_odds_chosen": 2.9361538887023926, "log_odds_ratio": -0.15144486725330353, "logits/chosen": 1.261246681213379, "logits/rejected": 1.2865996360778809, "logps/chosen": -2.109104633331299, "logps/rejected": -4.814465522766113, "loss": 0.5054, "nll_loss": 0.49025797843933105, "rewards/accuracies": 1.0, "rewards/chosen": -0.21091043949127197, "rewards/margins": 0.2705360949039459, "rewards/rejected": -0.4814465641975403, "step": 4535 }, { "epoch": 12.41889117043121, "grad_norm": 3.6453611850738525, "learning_rate": 3.786301369863014e-07, "log_odds_chosen": 2.7122156620025635, "log_odds_ratio": -0.18039856851100922, "logits/chosen": 1.091666579246521, "logits/rejected": 1.076917290687561, "logps/chosen": -1.6824795007705688, "logps/rejected": -4.19559383392334, "loss": 0.4648, "nll_loss": 0.44679898023605347, "rewards/accuracies": 1.0, "rewards/chosen": -0.16824795305728912, "rewards/margins": 0.25131142139434814, "rewards/rejected": -0.41955938935279846, "step": 4536 }, { "epoch": 12.421629021218344, "grad_norm": 3.583806037902832, "learning_rate": 3.784931506849315e-07, "log_odds_chosen": 3.0278897285461426, "log_odds_ratio": -0.21822258830070496, "logits/chosen": 1.024139404296875, "logits/rejected": 1.0307620763778687, "logps/chosen": -1.7519619464874268, "logps/rejected": -4.640393257141113, "loss": 0.6493, "nll_loss": 0.6274815201759338, "rewards/accuracies": 1.0, "rewards/chosen": -0.17519618570804596, "rewards/margins": 0.28884315490722656, "rewards/rejected": -0.4640393555164337, "step": 4537 }, { "epoch": 12.424366872005475, "grad_norm": 3.8080923557281494, "learning_rate": 3.7835616438356163e-07, "log_odds_chosen": 3.3449416160583496, "log_odds_ratio": -0.3339289426803589, "logits/chosen": 0.7732088565826416, "logits/rejected": 0.7040276527404785, "logps/chosen": -2.200007200241089, "logps/rejected": -5.277334213256836, "loss": 0.6555, "nll_loss": 0.6221548318862915, "rewards/accuracies": 0.875, "rewards/chosen": -0.2200007289648056, "rewards/margins": 0.3077327013015747, "rewards/rejected": -0.5277334451675415, "step": 4538 }, { "epoch": 12.427104722792608, "grad_norm": 3.480586290359497, "learning_rate": 3.782191780821918e-07, "log_odds_chosen": 1.910921335220337, "log_odds_ratio": -0.30424708127975464, "logits/chosen": 0.6846790313720703, "logits/rejected": 0.6768971681594849, "logps/chosen": -2.0389046669006348, "logps/rejected": -3.8733696937561035, "loss": 0.5116, "nll_loss": 0.4811548888683319, "rewards/accuracies": 1.0, "rewards/chosen": -0.20389047265052795, "rewards/margins": 0.18344652652740479, "rewards/rejected": -0.38733699917793274, "step": 4539 }, { "epoch": 12.429842573579739, "grad_norm": 3.41747784614563, "learning_rate": 3.780821917808219e-07, "log_odds_chosen": 2.4365389347076416, "log_odds_ratio": -0.24346427619457245, "logits/chosen": 1.1094260215759277, "logits/rejected": 1.091187834739685, "logps/chosen": -1.7917845249176025, "logps/rejected": -4.084488868713379, "loss": 0.5382, "nll_loss": 0.5138876438140869, "rewards/accuracies": 1.0, "rewards/chosen": -0.17917846143245697, "rewards/margins": 0.22927044332027435, "rewards/rejected": -0.40844887495040894, "step": 4540 }, { "epoch": 12.432580424366872, "grad_norm": 5.358498573303223, "learning_rate": 3.7794520547945203e-07, "log_odds_chosen": 2.540036678314209, "log_odds_ratio": -0.3164560794830322, "logits/chosen": 0.8922686576843262, "logits/rejected": 0.8523668646812439, "logps/chosen": -2.7312233448028564, "logps/rejected": -5.0751776695251465, "loss": 0.5797, "nll_loss": 0.5480916500091553, "rewards/accuracies": 0.75, "rewards/chosen": -0.2731223404407501, "rewards/margins": 0.23439544439315796, "rewards/rejected": -0.5075178146362305, "step": 4541 }, { "epoch": 12.435318275154003, "grad_norm": 5.96935510635376, "learning_rate": 3.778082191780822e-07, "log_odds_chosen": 3.010854721069336, "log_odds_ratio": -0.1850530058145523, "logits/chosen": 0.9363583922386169, "logits/rejected": 0.8765846490859985, "logps/chosen": -1.7186038494110107, "logps/rejected": -4.463559150695801, "loss": 0.5372, "nll_loss": 0.5187373161315918, "rewards/accuracies": 1.0, "rewards/chosen": -0.17186036705970764, "rewards/margins": 0.27449557185173035, "rewards/rejected": -0.446355938911438, "step": 4542 }, { "epoch": 12.438056125941136, "grad_norm": 3.6235482692718506, "learning_rate": 3.7767123287671234e-07, "log_odds_chosen": 2.7269134521484375, "log_odds_ratio": -0.14593812823295593, "logits/chosen": 0.8843331933021545, "logits/rejected": 0.8382797837257385, "logps/chosen": -1.2250001430511475, "logps/rejected": -3.470621347427368, "loss": 0.4991, "nll_loss": 0.4844818711280823, "rewards/accuracies": 1.0, "rewards/chosen": -0.12250001728534698, "rewards/margins": 0.2245621383190155, "rewards/rejected": -0.3470621705055237, "step": 4543 }, { "epoch": 12.44079397672827, "grad_norm": 3.7257039546966553, "learning_rate": 3.7753424657534243e-07, "log_odds_chosen": 2.0753071308135986, "log_odds_ratio": -0.238951176404953, "logits/chosen": 0.713455319404602, "logits/rejected": 0.7340158224105835, "logps/chosen": -1.3414992094039917, "logps/rejected": -3.1647937297821045, "loss": 0.4764, "nll_loss": 0.45251554250717163, "rewards/accuracies": 1.0, "rewards/chosen": -0.1341499388217926, "rewards/margins": 0.18232941627502441, "rewards/rejected": -0.3164793848991394, "step": 4544 }, { "epoch": 12.4435318275154, "grad_norm": 9.02429485321045, "learning_rate": 3.773972602739726e-07, "log_odds_chosen": 0.8689558506011963, "log_odds_ratio": -0.682676374912262, "logits/chosen": 1.0181273221969604, "logits/rejected": 1.0196901559829712, "logps/chosen": -3.265468120574951, "logps/rejected": -4.042358875274658, "loss": 0.6976, "nll_loss": 0.6293014287948608, "rewards/accuracies": 0.75, "rewards/chosen": -0.3265468180179596, "rewards/margins": 0.07768909633159637, "rewards/rejected": -0.4042358994483948, "step": 4545 }, { "epoch": 12.446269678302533, "grad_norm": 3.445909023284912, "learning_rate": 3.7726027397260274e-07, "log_odds_chosen": 2.5998592376708984, "log_odds_ratio": -0.14794138073921204, "logits/chosen": 0.8323752880096436, "logits/rejected": 0.7433928847312927, "logps/chosen": -1.4708960056304932, "logps/rejected": -3.7924270629882812, "loss": 0.4701, "nll_loss": 0.4553394913673401, "rewards/accuracies": 1.0, "rewards/chosen": -0.14708960056304932, "rewards/margins": 0.23215311765670776, "rewards/rejected": -0.3792427182197571, "step": 4546 }, { "epoch": 12.449007529089664, "grad_norm": 4.831733226776123, "learning_rate": 3.7712328767123284e-07, "log_odds_chosen": 1.983384370803833, "log_odds_ratio": -0.39023613929748535, "logits/chosen": 1.1618655920028687, "logits/rejected": 1.1075693368911743, "logps/chosen": -2.208974838256836, "logps/rejected": -4.150625228881836, "loss": 0.5619, "nll_loss": 0.5229062438011169, "rewards/accuracies": 0.75, "rewards/chosen": -0.22089746594429016, "rewards/margins": 0.19416505098342896, "rewards/rejected": -0.4150625467300415, "step": 4547 }, { "epoch": 12.451745379876797, "grad_norm": 3.5207531452178955, "learning_rate": 3.76986301369863e-07, "log_odds_chosen": 2.8977513313293457, "log_odds_ratio": -0.22158999741077423, "logits/chosen": 0.9924982786178589, "logits/rejected": 1.0257441997528076, "logps/chosen": -1.8815569877624512, "logps/rejected": -4.580784320831299, "loss": 0.5664, "nll_loss": 0.5442366003990173, "rewards/accuracies": 0.875, "rewards/chosen": -0.18815571069717407, "rewards/margins": 0.26992273330688477, "rewards/rejected": -0.45807841420173645, "step": 4548 }, { "epoch": 12.454483230663929, "grad_norm": 4.52630615234375, "learning_rate": 3.7684931506849314e-07, "log_odds_chosen": 2.9290809631347656, "log_odds_ratio": -0.2538216710090637, "logits/chosen": 0.9027860760688782, "logits/rejected": 0.9563485383987427, "logps/chosen": -2.232027530670166, "logps/rejected": -5.032171249389648, "loss": 0.7559, "nll_loss": 0.7304825782775879, "rewards/accuracies": 0.875, "rewards/chosen": -0.22320273518562317, "rewards/margins": 0.28001436591148376, "rewards/rejected": -0.5032171010971069, "step": 4549 }, { "epoch": 12.457221081451062, "grad_norm": 3.632845640182495, "learning_rate": 3.767123287671233e-07, "log_odds_chosen": 3.0268890857696533, "log_odds_ratio": -0.12176232039928436, "logits/chosen": 1.122359037399292, "logits/rejected": 1.0971462726593018, "logps/chosen": -2.010197639465332, "logps/rejected": -4.82512092590332, "loss": 0.4861, "nll_loss": 0.4739172160625458, "rewards/accuracies": 1.0, "rewards/chosen": -0.2010197788476944, "rewards/margins": 0.28149232268333435, "rewards/rejected": -0.48251211643218994, "step": 4550 }, { "epoch": 12.459958932238193, "grad_norm": 3.764770984649658, "learning_rate": 3.765753424657534e-07, "log_odds_chosen": 2.887704849243164, "log_odds_ratio": -0.1564774364233017, "logits/chosen": 1.2363693714141846, "logits/rejected": 1.3225345611572266, "logps/chosen": -1.887601375579834, "logps/rejected": -4.441957473754883, "loss": 0.5129, "nll_loss": 0.497212290763855, "rewards/accuracies": 1.0, "rewards/chosen": -0.18876013159751892, "rewards/margins": 0.25543564558029175, "rewards/rejected": -0.44419580698013306, "step": 4551 }, { "epoch": 12.462696783025326, "grad_norm": 3.4830322265625, "learning_rate": 3.7643835616438355e-07, "log_odds_chosen": 2.6164560317993164, "log_odds_ratio": -0.1439298540353775, "logits/chosen": 0.8745746612548828, "logits/rejected": 0.8505532741546631, "logps/chosen": -1.54103684425354, "logps/rejected": -3.8768227100372314, "loss": 0.5485, "nll_loss": 0.5340924859046936, "rewards/accuracies": 1.0, "rewards/chosen": -0.15410368144512177, "rewards/margins": 0.23357856273651123, "rewards/rejected": -0.3876822590827942, "step": 4552 }, { "epoch": 12.465434633812457, "grad_norm": 3.325430154800415, "learning_rate": 3.763013698630137e-07, "log_odds_chosen": 3.178116798400879, "log_odds_ratio": -0.09911312162876129, "logits/chosen": 1.1303308010101318, "logits/rejected": 1.1037929058074951, "logps/chosen": -2.3115482330322266, "logps/rejected": -5.358813285827637, "loss": 0.5622, "nll_loss": 0.5522927045822144, "rewards/accuracies": 1.0, "rewards/chosen": -0.23115482926368713, "rewards/margins": 0.3047264814376831, "rewards/rejected": -0.5358813405036926, "step": 4553 }, { "epoch": 12.46817248459959, "grad_norm": 3.54677677154541, "learning_rate": 3.761643835616438e-07, "log_odds_chosen": 3.772291660308838, "log_odds_ratio": -0.15605498850345612, "logits/chosen": 1.0397082567214966, "logits/rejected": 0.9670215249061584, "logps/chosen": -1.6242340803146362, "logps/rejected": -5.186642646789551, "loss": 0.6448, "nll_loss": 0.6291748285293579, "rewards/accuracies": 1.0, "rewards/chosen": -0.16242341697216034, "rewards/margins": 0.3562408685684204, "rewards/rejected": -0.5186642408370972, "step": 4554 }, { "epoch": 12.470910335386721, "grad_norm": 8.882152557373047, "learning_rate": 3.7602739726027395e-07, "log_odds_chosen": 1.6567484140396118, "log_odds_ratio": -0.7896050810813904, "logits/chosen": 0.9442587494850159, "logits/rejected": 0.9555825591087341, "logps/chosen": -2.640106678009033, "logps/rejected": -4.11025333404541, "loss": 0.7046, "nll_loss": 0.6256058216094971, "rewards/accuracies": 0.875, "rewards/chosen": -0.2640106678009033, "rewards/margins": 0.14701467752456665, "rewards/rejected": -0.41102534532546997, "step": 4555 }, { "epoch": 12.473648186173854, "grad_norm": 6.818126201629639, "learning_rate": 3.758904109589041e-07, "log_odds_chosen": 5.667399883270264, "log_odds_ratio": -0.15255406498908997, "logits/chosen": 1.1471288204193115, "logits/rejected": 1.1445292234420776, "logps/chosen": -2.2814278602600098, "logps/rejected": -7.80659818649292, "loss": 0.5829, "nll_loss": 0.5676221251487732, "rewards/accuracies": 1.0, "rewards/chosen": -0.22814278304576874, "rewards/margins": 0.5525170564651489, "rewards/rejected": -0.7806598544120789, "step": 4556 }, { "epoch": 12.476386036960985, "grad_norm": 3.1915698051452637, "learning_rate": 3.7575342465753425e-07, "log_odds_chosen": 3.816634178161621, "log_odds_ratio": -0.09496016055345535, "logits/chosen": 1.1696372032165527, "logits/rejected": 1.2184209823608398, "logps/chosen": -2.415318250656128, "logps/rejected": -6.026185989379883, "loss": 0.5797, "nll_loss": 0.5702325701713562, "rewards/accuracies": 1.0, "rewards/chosen": -0.2415318489074707, "rewards/margins": 0.36108678579330444, "rewards/rejected": -0.6026186347007751, "step": 4557 }, { "epoch": 12.479123887748118, "grad_norm": 4.293397903442383, "learning_rate": 3.7561643835616435e-07, "log_odds_chosen": 3.594862461090088, "log_odds_ratio": -0.19805040955543518, "logits/chosen": 1.4533319473266602, "logits/rejected": 1.4917171001434326, "logps/chosen": -2.642306327819824, "logps/rejected": -6.154666423797607, "loss": 0.6038, "nll_loss": 0.5839932560920715, "rewards/accuracies": 0.875, "rewards/chosen": -0.2642306387424469, "rewards/margins": 0.3512360453605652, "rewards/rejected": -0.6154667139053345, "step": 4558 }, { "epoch": 12.48186173853525, "grad_norm": 5.680967330932617, "learning_rate": 3.7547945205479445e-07, "log_odds_chosen": 2.738508939743042, "log_odds_ratio": -0.40186649560928345, "logits/chosen": 1.0619065761566162, "logits/rejected": 1.1079506874084473, "logps/chosen": -2.3733673095703125, "logps/rejected": -4.884788513183594, "loss": 0.6388, "nll_loss": 0.5986031889915466, "rewards/accuracies": 0.75, "rewards/chosen": -0.23733673989772797, "rewards/margins": 0.25114211440086365, "rewards/rejected": -0.4884788393974304, "step": 4559 }, { "epoch": 12.484599589322382, "grad_norm": 3.571817636489868, "learning_rate": 3.7534246575342466e-07, "log_odds_chosen": 1.7290056943893433, "log_odds_ratio": -0.2816329598426819, "logits/chosen": 0.8785895109176636, "logits/rejected": 0.7909265756607056, "logps/chosen": -1.8389735221862793, "logps/rejected": -3.350900650024414, "loss": 0.5716, "nll_loss": 0.5434865355491638, "rewards/accuracies": 0.875, "rewards/chosen": -0.18389734625816345, "rewards/margins": 0.15119273960590363, "rewards/rejected": -0.33509010076522827, "step": 4560 }, { "epoch": 12.487337440109513, "grad_norm": 4.504017353057861, "learning_rate": 3.7520547945205475e-07, "log_odds_chosen": 2.1144495010375977, "log_odds_ratio": -0.18713343143463135, "logits/chosen": 1.2973960638046265, "logits/rejected": 1.2574725151062012, "logps/chosen": -1.799668788909912, "logps/rejected": -3.742654323577881, "loss": 0.5732, "nll_loss": 0.5544984340667725, "rewards/accuracies": 1.0, "rewards/chosen": -0.17996688187122345, "rewards/margins": 0.19429855048656464, "rewards/rejected": -0.3742654323577881, "step": 4561 }, { "epoch": 12.490075290896646, "grad_norm": 3.5690252780914307, "learning_rate": 3.750684931506849e-07, "log_odds_chosen": 1.4240517616271973, "log_odds_ratio": -0.30517446994781494, "logits/chosen": 1.0600800514221191, "logits/rejected": 1.0291829109191895, "logps/chosen": -1.7090190649032593, "logps/rejected": -2.9238836765289307, "loss": 0.4987, "nll_loss": 0.4682082235813141, "rewards/accuracies": 1.0, "rewards/chosen": -0.17090190947055817, "rewards/margins": 0.12148647010326385, "rewards/rejected": -0.292388379573822, "step": 4562 }, { "epoch": 12.492813141683778, "grad_norm": 3.4868578910827637, "learning_rate": 3.7493150684931506e-07, "log_odds_chosen": 1.9716382026672363, "log_odds_ratio": -0.16960377991199493, "logits/chosen": 1.052966833114624, "logits/rejected": 0.9929816126823425, "logps/chosen": -2.331176996231079, "logps/rejected": -4.134387969970703, "loss": 0.565, "nll_loss": 0.5480403900146484, "rewards/accuracies": 1.0, "rewards/chosen": -0.23311768472194672, "rewards/margins": 0.1803211122751236, "rewards/rejected": -0.4134387969970703, "step": 4563 }, { "epoch": 12.49555099247091, "grad_norm": 5.088047981262207, "learning_rate": 3.747945205479452e-07, "log_odds_chosen": 2.829350471496582, "log_odds_ratio": -0.21449482440948486, "logits/chosen": 1.0511828660964966, "logits/rejected": 1.027434229850769, "logps/chosen": -1.6427733898162842, "logps/rejected": -4.220184326171875, "loss": 0.5631, "nll_loss": 0.5416263341903687, "rewards/accuracies": 0.875, "rewards/chosen": -0.1642773449420929, "rewards/margins": 0.2577410936355591, "rewards/rejected": -0.42201846837997437, "step": 4564 }, { "epoch": 12.498288843258042, "grad_norm": 3.5549399852752686, "learning_rate": 3.746575342465753e-07, "log_odds_chosen": 2.63124418258667, "log_odds_ratio": -0.25718092918395996, "logits/chosen": 1.062868356704712, "logits/rejected": 0.9998639225959778, "logps/chosen": -1.7059745788574219, "logps/rejected": -4.19962739944458, "loss": 0.5762, "nll_loss": 0.5504650473594666, "rewards/accuracies": 1.0, "rewards/chosen": -0.17059746384620667, "rewards/margins": 0.24936529994010925, "rewards/rejected": -0.41996270418167114, "step": 4565 }, { "epoch": 12.501026694045175, "grad_norm": 3.636784553527832, "learning_rate": 3.745205479452055e-07, "log_odds_chosen": 1.4511221647262573, "log_odds_ratio": -0.3105342984199524, "logits/chosen": 1.1373640298843384, "logits/rejected": 1.0045527219772339, "logps/chosen": -1.643902063369751, "logps/rejected": -2.9640588760375977, "loss": 0.5628, "nll_loss": 0.531764805316925, "rewards/accuracies": 1.0, "rewards/chosen": -0.16439023613929749, "rewards/margins": 0.13201569020748138, "rewards/rejected": -0.2964059114456177, "step": 4566 }, { "epoch": 12.503764544832306, "grad_norm": 4.486915111541748, "learning_rate": 3.743835616438356e-07, "log_odds_chosen": 2.9419360160827637, "log_odds_ratio": -0.18569326400756836, "logits/chosen": 1.2799936532974243, "logits/rejected": 1.3156988620758057, "logps/chosen": -2.184565544128418, "logps/rejected": -4.994018077850342, "loss": 0.5727, "nll_loss": 0.5541365742683411, "rewards/accuracies": 0.875, "rewards/chosen": -0.21845653653144836, "rewards/margins": 0.2809452712535858, "rewards/rejected": -0.49940186738967896, "step": 4567 }, { "epoch": 12.506502395619439, "grad_norm": 3.2474007606506348, "learning_rate": 3.742465753424657e-07, "log_odds_chosen": 5.512304306030273, "log_odds_ratio": -0.14947640895843506, "logits/chosen": 1.1952259540557861, "logits/rejected": 1.199524998664856, "logps/chosen": -1.7284178733825684, "logps/rejected": -6.980438232421875, "loss": 0.5511, "nll_loss": 0.5361588001251221, "rewards/accuracies": 1.0, "rewards/chosen": -0.17284178733825684, "rewards/margins": 0.5252020955085754, "rewards/rejected": -0.6980438828468323, "step": 4568 }, { "epoch": 12.50924024640657, "grad_norm": 3.9415087699890137, "learning_rate": 3.7410958904109587e-07, "log_odds_chosen": 3.7194833755493164, "log_odds_ratio": -0.06425095349550247, "logits/chosen": 1.0914855003356934, "logits/rejected": 1.1397640705108643, "logps/chosen": -2.1334688663482666, "logps/rejected": -5.680695533752441, "loss": 0.6301, "nll_loss": 0.6236505508422852, "rewards/accuracies": 1.0, "rewards/chosen": -0.21334689855575562, "rewards/margins": 0.35472261905670166, "rewards/rejected": -0.5680695176124573, "step": 4569 }, { "epoch": 12.511978097193703, "grad_norm": 3.5087764263153076, "learning_rate": 3.73972602739726e-07, "log_odds_chosen": 3.856621503829956, "log_odds_ratio": -0.22671014070510864, "logits/chosen": 0.8731650114059448, "logits/rejected": 0.9002794623374939, "logps/chosen": -1.5515823364257812, "logps/rejected": -5.126773357391357, "loss": 0.5772, "nll_loss": 0.5545044541358948, "rewards/accuracies": 1.0, "rewards/chosen": -0.15515825152397156, "rewards/margins": 0.35751909017562866, "rewards/rejected": -0.5126773118972778, "step": 4570 }, { "epoch": 12.514715947980836, "grad_norm": 3.3415372371673584, "learning_rate": 3.7383561643835617e-07, "log_odds_chosen": 2.021465301513672, "log_odds_ratio": -0.21663235127925873, "logits/chosen": 0.9545117616653442, "logits/rejected": 0.9214721322059631, "logps/chosen": -2.2829604148864746, "logps/rejected": -4.074459075927734, "loss": 0.5357, "nll_loss": 0.5140633583068848, "rewards/accuracies": 1.0, "rewards/chosen": -0.22829604148864746, "rewards/margins": 0.1791498363018036, "rewards/rejected": -0.40744587779045105, "step": 4571 }, { "epoch": 12.517453798767967, "grad_norm": 5.850575923919678, "learning_rate": 3.7369863013698627e-07, "log_odds_chosen": 4.247139930725098, "log_odds_ratio": -0.4365243911743164, "logits/chosen": 1.1604628562927246, "logits/rejected": 1.1867696046829224, "logps/chosen": -2.8126118183135986, "logps/rejected": -7.040220737457275, "loss": 0.6408, "nll_loss": 0.5971606969833374, "rewards/accuracies": 0.75, "rewards/chosen": -0.2812611758708954, "rewards/margins": 0.42276090383529663, "rewards/rejected": -0.7040220499038696, "step": 4572 }, { "epoch": 12.5201916495551, "grad_norm": 4.231314182281494, "learning_rate": 3.735616438356164e-07, "log_odds_chosen": 3.9727017879486084, "log_odds_ratio": -0.08920250833034515, "logits/chosen": 1.200851559638977, "logits/rejected": 1.2407032251358032, "logps/chosen": -1.3948571681976318, "logps/rejected": -5.011510372161865, "loss": 0.5198, "nll_loss": 0.5109165906906128, "rewards/accuracies": 1.0, "rewards/chosen": -0.13948571681976318, "rewards/margins": 0.36166536808013916, "rewards/rejected": -0.5011510848999023, "step": 4573 }, { "epoch": 12.522929500342231, "grad_norm": 4.619289398193359, "learning_rate": 3.7342465753424657e-07, "log_odds_chosen": 1.8146055936813354, "log_odds_ratio": -0.3651646673679352, "logits/chosen": 1.2221453189849854, "logits/rejected": 1.226999282836914, "logps/chosen": -2.500187397003174, "logps/rejected": -4.190719127655029, "loss": 0.5553, "nll_loss": 0.5187995433807373, "rewards/accuracies": 0.875, "rewards/chosen": -0.25001874566078186, "rewards/margins": 0.16905318200588226, "rewards/rejected": -0.41907191276550293, "step": 4574 }, { "epoch": 12.525667351129364, "grad_norm": 3.26753568649292, "learning_rate": 3.7328767123287667e-07, "log_odds_chosen": 2.052713394165039, "log_odds_ratio": -0.16709044575691223, "logits/chosen": 0.8573631644248962, "logits/rejected": 0.8068233132362366, "logps/chosen": -1.8272398710250854, "logps/rejected": -3.6418042182922363, "loss": 0.4663, "nll_loss": 0.449626088142395, "rewards/accuracies": 1.0, "rewards/chosen": -0.1827239990234375, "rewards/margins": 0.18145643174648285, "rewards/rejected": -0.36418044567108154, "step": 4575 }, { "epoch": 12.528405201916495, "grad_norm": 6.919745922088623, "learning_rate": 3.731506849315068e-07, "log_odds_chosen": 2.9378390312194824, "log_odds_ratio": -0.5440938472747803, "logits/chosen": 1.1765263080596924, "logits/rejected": 1.1578917503356934, "logps/chosen": -2.524693012237549, "logps/rejected": -5.344508171081543, "loss": 0.6083, "nll_loss": 0.5538643598556519, "rewards/accuracies": 0.75, "rewards/chosen": -0.2524693012237549, "rewards/margins": 0.281981498003006, "rewards/rejected": -0.5344508290290833, "step": 4576 }, { "epoch": 12.531143052703628, "grad_norm": 3.567383050918579, "learning_rate": 3.73013698630137e-07, "log_odds_chosen": 2.1681578159332275, "log_odds_ratio": -0.17182546854019165, "logits/chosen": 0.9494839906692505, "logits/rejected": 0.8682715892791748, "logps/chosen": -1.5964336395263672, "logps/rejected": -3.5754003524780273, "loss": 0.5157, "nll_loss": 0.49848273396492004, "rewards/accuracies": 1.0, "rewards/chosen": -0.15964335203170776, "rewards/margins": 0.19789665937423706, "rewards/rejected": -0.3575400412082672, "step": 4577 }, { "epoch": 12.53388090349076, "grad_norm": 3.239396572113037, "learning_rate": 3.7287671232876713e-07, "log_odds_chosen": 1.982141137123108, "log_odds_ratio": -0.23712345957756042, "logits/chosen": 1.0429699420928955, "logits/rejected": 1.073237657546997, "logps/chosen": -1.743034839630127, "logps/rejected": -3.536525249481201, "loss": 0.5554, "nll_loss": 0.5316500663757324, "rewards/accuracies": 1.0, "rewards/chosen": -0.17430348694324493, "rewards/margins": 0.17934904992580414, "rewards/rejected": -0.3536525368690491, "step": 4578 }, { "epoch": 12.536618754277892, "grad_norm": 5.60822057723999, "learning_rate": 3.7273972602739723e-07, "log_odds_chosen": 2.0643439292907715, "log_odds_ratio": -0.2215462028980255, "logits/chosen": 1.1523046493530273, "logits/rejected": 1.1441693305969238, "logps/chosen": -2.3413338661193848, "logps/rejected": -4.303813457489014, "loss": 0.5383, "nll_loss": 0.5161229968070984, "rewards/accuracies": 1.0, "rewards/chosen": -0.23413339257240295, "rewards/margins": 0.19624797999858856, "rewards/rejected": -0.4303813576698303, "step": 4579 }, { "epoch": 12.539356605065024, "grad_norm": 3.575998544692993, "learning_rate": 3.726027397260274e-07, "log_odds_chosen": 2.8802268505096436, "log_odds_ratio": -0.17160844802856445, "logits/chosen": 1.1541410684585571, "logits/rejected": 1.1052576303482056, "logps/chosen": -1.813580870628357, "logps/rejected": -4.534472942352295, "loss": 0.6213, "nll_loss": 0.6041551828384399, "rewards/accuracies": 1.0, "rewards/chosen": -0.18135806918144226, "rewards/margins": 0.2720892131328583, "rewards/rejected": -0.45344728231430054, "step": 4580 }, { "epoch": 12.542094455852157, "grad_norm": 3.3508169651031494, "learning_rate": 3.7246575342465753e-07, "log_odds_chosen": 4.921809196472168, "log_odds_ratio": -0.08726248890161514, "logits/chosen": 0.8226800560951233, "logits/rejected": 0.7859682440757751, "logps/chosen": -1.9048571586608887, "logps/rejected": -6.578919887542725, "loss": 0.6386, "nll_loss": 0.6298344731330872, "rewards/accuracies": 1.0, "rewards/chosen": -0.19048573076725006, "rewards/margins": 0.4674062728881836, "rewards/rejected": -0.6578919887542725, "step": 4581 }, { "epoch": 12.544832306639288, "grad_norm": 3.5916807651519775, "learning_rate": 3.7232876712328763e-07, "log_odds_chosen": 2.5130653381347656, "log_odds_ratio": -0.18003720045089722, "logits/chosen": 1.1710501909255981, "logits/rejected": 1.2071878910064697, "logps/chosen": -2.0792813301086426, "logps/rejected": -4.4202775955200195, "loss": 0.544, "nll_loss": 0.5259479284286499, "rewards/accuracies": 0.875, "rewards/chosen": -0.2079281508922577, "rewards/margins": 0.2340996116399765, "rewards/rejected": -0.4420278072357178, "step": 4582 }, { "epoch": 12.54757015742642, "grad_norm": 4.12111759185791, "learning_rate": 3.721917808219178e-07, "log_odds_chosen": 2.5038490295410156, "log_odds_ratio": -0.20657022297382355, "logits/chosen": 1.0488718748092651, "logits/rejected": 1.057569980621338, "logps/chosen": -2.0619056224823, "logps/rejected": -4.406989574432373, "loss": 0.4877, "nll_loss": 0.46700865030288696, "rewards/accuracies": 1.0, "rewards/chosen": -0.2061905562877655, "rewards/margins": 0.23450841009616852, "rewards/rejected": -0.4406989514827728, "step": 4583 }, { "epoch": 12.550308008213552, "grad_norm": 3.7285616397857666, "learning_rate": 3.7205479452054794e-07, "log_odds_chosen": 2.4697840213775635, "log_odds_ratio": -0.24549803137779236, "logits/chosen": 0.8326019048690796, "logits/rejected": 0.8923470377922058, "logps/chosen": -1.9968383312225342, "logps/rejected": -4.3304219245910645, "loss": 0.57, "nll_loss": 0.545490026473999, "rewards/accuracies": 1.0, "rewards/chosen": -0.19968384504318237, "rewards/margins": 0.23335835337638855, "rewards/rejected": -0.43304216861724854, "step": 4584 }, { "epoch": 12.553045859000685, "grad_norm": 3.9480302333831787, "learning_rate": 3.719178082191781e-07, "log_odds_chosen": 1.726184368133545, "log_odds_ratio": -0.20317113399505615, "logits/chosen": 0.8811371326446533, "logits/rejected": 0.8190958499908447, "logps/chosen": -2.1826252937316895, "logps/rejected": -3.765960216522217, "loss": 0.578, "nll_loss": 0.5576478838920593, "rewards/accuracies": 1.0, "rewards/chosen": -0.21826253831386566, "rewards/margins": 0.1583334505558014, "rewards/rejected": -0.37659600377082825, "step": 4585 }, { "epoch": 12.555783709787816, "grad_norm": 3.505577564239502, "learning_rate": 3.717808219178082e-07, "log_odds_chosen": 3.1731550693511963, "log_odds_ratio": -0.14167365431785583, "logits/chosen": 0.8650150895118713, "logits/rejected": 0.852340579032898, "logps/chosen": -1.5535495281219482, "logps/rejected": -4.488580226898193, "loss": 0.4534, "nll_loss": 0.43925386667251587, "rewards/accuracies": 1.0, "rewards/chosen": -0.15535496175289154, "rewards/margins": 0.2935030460357666, "rewards/rejected": -0.44885802268981934, "step": 4586 }, { "epoch": 12.558521560574949, "grad_norm": 3.6916749477386475, "learning_rate": 3.7164383561643834e-07, "log_odds_chosen": 2.2813515663146973, "log_odds_ratio": -0.25520268082618713, "logits/chosen": 1.0528218746185303, "logits/rejected": 1.0660322904586792, "logps/chosen": -1.741589069366455, "logps/rejected": -3.8516547679901123, "loss": 0.54, "nll_loss": 0.51445472240448, "rewards/accuracies": 1.0, "rewards/chosen": -0.17415890097618103, "rewards/margins": 0.2110065519809723, "rewards/rejected": -0.3851654827594757, "step": 4587 }, { "epoch": 12.56125941136208, "grad_norm": 3.2818195819854736, "learning_rate": 3.715068493150685e-07, "log_odds_chosen": 2.5657284259796143, "log_odds_ratio": -0.1987258791923523, "logits/chosen": 1.1428872346878052, "logits/rejected": 1.0890967845916748, "logps/chosen": -1.9760122299194336, "logps/rejected": -4.356472015380859, "loss": 0.5886, "nll_loss": 0.5687373876571655, "rewards/accuracies": 1.0, "rewards/chosen": -0.19760122895240784, "rewards/margins": 0.23804599046707153, "rewards/rejected": -0.43564724922180176, "step": 4588 }, { "epoch": 12.563997262149213, "grad_norm": 3.500526189804077, "learning_rate": 3.713698630136986e-07, "log_odds_chosen": 2.1724045276641846, "log_odds_ratio": -0.22414030134677887, "logits/chosen": 1.2356491088867188, "logits/rejected": 1.19741690158844, "logps/chosen": -1.6396664381027222, "logps/rejected": -3.6293141841888428, "loss": 0.5592, "nll_loss": 0.5367664694786072, "rewards/accuracies": 1.0, "rewards/chosen": -0.16396664083003998, "rewards/margins": 0.19896477460861206, "rewards/rejected": -0.36293143033981323, "step": 4589 }, { "epoch": 12.566735112936344, "grad_norm": 5.019794940948486, "learning_rate": 3.7123287671232874e-07, "log_odds_chosen": 0.691072940826416, "log_odds_ratio": -0.4523894786834717, "logits/chosen": 1.2664402723312378, "logits/rejected": 1.2307538986206055, "logps/chosen": -1.5369932651519775, "logps/rejected": -2.070261001586914, "loss": 0.4721, "nll_loss": 0.426862895488739, "rewards/accuracies": 0.875, "rewards/chosen": -0.15369932353496552, "rewards/margins": 0.05332678556442261, "rewards/rejected": -0.20702612400054932, "step": 4590 }, { "epoch": 12.569472963723477, "grad_norm": 9.233530044555664, "learning_rate": 3.710958904109589e-07, "log_odds_chosen": 2.0915446281433105, "log_odds_ratio": -0.6205040812492371, "logits/chosen": 1.3645963668823242, "logits/rejected": 1.3152813911437988, "logps/chosen": -2.580399513244629, "logps/rejected": -4.51616907119751, "loss": 0.612, "nll_loss": 0.549909234046936, "rewards/accuracies": 0.75, "rewards/chosen": -0.2580399513244629, "rewards/margins": 0.19357696175575256, "rewards/rejected": -0.45161688327789307, "step": 4591 }, { "epoch": 12.572210814510608, "grad_norm": 3.7817654609680176, "learning_rate": 3.7095890410958905e-07, "log_odds_chosen": 2.6123485565185547, "log_odds_ratio": -0.12446561455726624, "logits/chosen": 1.1140239238739014, "logits/rejected": 1.1307554244995117, "logps/chosen": -2.317476272583008, "logps/rejected": -4.810305118560791, "loss": 0.5391, "nll_loss": 0.5266950130462646, "rewards/accuracies": 1.0, "rewards/chosen": -0.23174762725830078, "rewards/margins": 0.24928289651870728, "rewards/rejected": -0.48103049397468567, "step": 4592 }, { "epoch": 12.574948665297741, "grad_norm": 4.955281734466553, "learning_rate": 3.7082191780821914e-07, "log_odds_chosen": 3.635204792022705, "log_odds_ratio": -0.33723312616348267, "logits/chosen": 1.2752559185028076, "logits/rejected": 1.2234593629837036, "logps/chosen": -1.76481032371521, "logps/rejected": -5.209111213684082, "loss": 0.6109, "nll_loss": 0.5771811008453369, "rewards/accuracies": 0.75, "rewards/chosen": -0.17648103833198547, "rewards/margins": 0.3444300591945648, "rewards/rejected": -0.5209110975265503, "step": 4593 }, { "epoch": 12.577686516084874, "grad_norm": 5.255753993988037, "learning_rate": 3.706849315068493e-07, "log_odds_chosen": 2.430807590484619, "log_odds_ratio": -0.3286113440990448, "logits/chosen": 1.0132750272750854, "logits/rejected": 1.045788049697876, "logps/chosen": -1.9966330528259277, "logps/rejected": -4.247931957244873, "loss": 0.624, "nll_loss": 0.5911043882369995, "rewards/accuracies": 0.875, "rewards/chosen": -0.19966331124305725, "rewards/margins": 0.2251299023628235, "rewards/rejected": -0.42479318380355835, "step": 4594 }, { "epoch": 12.580424366872005, "grad_norm": 5.031625270843506, "learning_rate": 3.7054794520547945e-07, "log_odds_chosen": 2.9687693119049072, "log_odds_ratio": -0.21392323076725006, "logits/chosen": 0.8213194012641907, "logits/rejected": 0.7158130407333374, "logps/chosen": -1.8536996841430664, "logps/rejected": -4.644119739532471, "loss": 0.4924, "nll_loss": 0.47104552388191223, "rewards/accuracies": 1.0, "rewards/chosen": -0.18536998331546783, "rewards/margins": 0.27904200553894043, "rewards/rejected": -0.46441197395324707, "step": 4595 }, { "epoch": 12.583162217659137, "grad_norm": 5.644250392913818, "learning_rate": 3.7041095890410955e-07, "log_odds_chosen": 2.3692591190338135, "log_odds_ratio": -0.32294195890426636, "logits/chosen": 0.9633285999298096, "logits/rejected": 0.9347141981124878, "logps/chosen": -2.2016327381134033, "logps/rejected": -4.354821681976318, "loss": 0.6116, "nll_loss": 0.5792626142501831, "rewards/accuracies": 0.875, "rewards/chosen": -0.2201632708311081, "rewards/margins": 0.21531890332698822, "rewards/rejected": -0.4354821741580963, "step": 4596 }, { "epoch": 12.58590006844627, "grad_norm": 4.034007549285889, "learning_rate": 3.7027397260273975e-07, "log_odds_chosen": 1.5634138584136963, "log_odds_ratio": -0.272672176361084, "logits/chosen": 1.1999669075012207, "logits/rejected": 1.1252620220184326, "logps/chosen": -1.7370184659957886, "logps/rejected": -3.1504900455474854, "loss": 0.5165, "nll_loss": 0.48921406269073486, "rewards/accuracies": 0.875, "rewards/chosen": -0.17370185256004333, "rewards/margins": 0.14134716987609863, "rewards/rejected": -0.31504902243614197, "step": 4597 }, { "epoch": 12.588637919233403, "grad_norm": 4.279412746429443, "learning_rate": 3.7013698630136985e-07, "log_odds_chosen": 2.6329665184020996, "log_odds_ratio": -0.2348320484161377, "logits/chosen": 1.0258996486663818, "logits/rejected": 0.9700915217399597, "logps/chosen": -1.3880095481872559, "logps/rejected": -3.7614052295684814, "loss": 0.528, "nll_loss": 0.5044683218002319, "rewards/accuracies": 1.0, "rewards/chosen": -0.1388009786605835, "rewards/margins": 0.2373395562171936, "rewards/rejected": -0.3761405348777771, "step": 4598 }, { "epoch": 12.591375770020534, "grad_norm": 8.285273551940918, "learning_rate": 3.7e-07, "log_odds_chosen": 1.9329419136047363, "log_odds_ratio": -0.49810007214546204, "logits/chosen": 1.354851245880127, "logits/rejected": 1.3001841306686401, "logps/chosen": -2.248041868209839, "logps/rejected": -4.089476108551025, "loss": 0.5869, "nll_loss": 0.5371078848838806, "rewards/accuracies": 0.875, "rewards/chosen": -0.22480419278144836, "rewards/margins": 0.18414342403411865, "rewards/rejected": -0.40894758701324463, "step": 4599 }, { "epoch": 12.594113620807667, "grad_norm": 4.681517601013184, "learning_rate": 3.698630136986301e-07, "log_odds_chosen": 1.2356631755828857, "log_odds_ratio": -0.3923664689064026, "logits/chosen": 1.0521106719970703, "logits/rejected": 1.05097234249115, "logps/chosen": -2.3192107677459717, "logps/rejected": -3.4350502490997314, "loss": 0.5367, "nll_loss": 0.4974786043167114, "rewards/accuracies": 0.875, "rewards/chosen": -0.23192107677459717, "rewards/margins": 0.11158394068479538, "rewards/rejected": -0.34350502490997314, "step": 4600 }, { "epoch": 12.596851471594798, "grad_norm": 3.5990662574768066, "learning_rate": 3.6972602739726026e-07, "log_odds_chosen": 2.616499423980713, "log_odds_ratio": -0.16483011841773987, "logits/chosen": 1.4867757558822632, "logits/rejected": 1.4820454120635986, "logps/chosen": -2.197122812271118, "logps/rejected": -4.6792192459106445, "loss": 0.524, "nll_loss": 0.5075153708457947, "rewards/accuracies": 1.0, "rewards/chosen": -0.2197122871875763, "rewards/margins": 0.2482096552848816, "rewards/rejected": -0.4679219722747803, "step": 4601 }, { "epoch": 12.59958932238193, "grad_norm": 4.1356658935546875, "learning_rate": 3.695890410958904e-07, "log_odds_chosen": 3.369166135787964, "log_odds_ratio": -0.09482645988464355, "logits/chosen": 1.4235420227050781, "logits/rejected": 1.5036417245864868, "logps/chosen": -1.9143145084381104, "logps/rejected": -5.088479995727539, "loss": 0.5186, "nll_loss": 0.509135365486145, "rewards/accuracies": 1.0, "rewards/chosen": -0.19143146276474, "rewards/margins": 0.31741654872894287, "rewards/rejected": -0.5088479518890381, "step": 4602 }, { "epoch": 12.602327173169062, "grad_norm": 6.5854973793029785, "learning_rate": 3.694520547945205e-07, "log_odds_chosen": 1.1958245038986206, "log_odds_ratio": -0.6045804023742676, "logits/chosen": 0.6867121458053589, "logits/rejected": 0.6586609482765198, "logps/chosen": -2.1665287017822266, "logps/rejected": -3.244941473007202, "loss": 0.6568, "nll_loss": 0.5963749885559082, "rewards/accuracies": 0.875, "rewards/chosen": -0.21665285527706146, "rewards/margins": 0.10784129798412323, "rewards/rejected": -0.3244941830635071, "step": 4603 }, { "epoch": 12.605065023956195, "grad_norm": 3.537658929824829, "learning_rate": 3.693150684931507e-07, "log_odds_chosen": 2.822427749633789, "log_odds_ratio": -0.12056894600391388, "logits/chosen": 0.878836989402771, "logits/rejected": 0.8536267280578613, "logps/chosen": -2.1334967613220215, "logps/rejected": -4.779341220855713, "loss": 0.6138, "nll_loss": 0.601715087890625, "rewards/accuracies": 1.0, "rewards/chosen": -0.21334967017173767, "rewards/margins": 0.264584481716156, "rewards/rejected": -0.4779341518878937, "step": 4604 }, { "epoch": 12.607802874743326, "grad_norm": 3.890054225921631, "learning_rate": 3.691780821917808e-07, "log_odds_chosen": 2.5970563888549805, "log_odds_ratio": -0.1855258047580719, "logits/chosen": 0.7636817097663879, "logits/rejected": 0.6759722828865051, "logps/chosen": -1.747626781463623, "logps/rejected": -4.123331069946289, "loss": 0.5099, "nll_loss": 0.49136483669281006, "rewards/accuracies": 1.0, "rewards/chosen": -0.17476268112659454, "rewards/margins": 0.23757044970989227, "rewards/rejected": -0.4123331308364868, "step": 4605 }, { "epoch": 12.61054072553046, "grad_norm": 4.477913856506348, "learning_rate": 3.690410958904109e-07, "log_odds_chosen": 2.8670241832733154, "log_odds_ratio": -0.17328782379627228, "logits/chosen": 1.1914929151535034, "logits/rejected": 1.1785821914672852, "logps/chosen": -2.6569271087646484, "logps/rejected": -5.418915748596191, "loss": 0.5995, "nll_loss": 0.5821566581726074, "rewards/accuracies": 1.0, "rewards/chosen": -0.26569271087646484, "rewards/margins": 0.2761988639831543, "rewards/rejected": -0.5418916344642639, "step": 4606 }, { "epoch": 12.61327857631759, "grad_norm": 5.266028881072998, "learning_rate": 3.6890410958904106e-07, "log_odds_chosen": 1.1729562282562256, "log_odds_ratio": -0.3872346878051758, "logits/chosen": 0.9138772487640381, "logits/rejected": 0.7815036177635193, "logps/chosen": -1.7479345798492432, "logps/rejected": -2.8018743991851807, "loss": 0.7203, "nll_loss": 0.6815420985221863, "rewards/accuracies": 0.875, "rewards/chosen": -0.17479345202445984, "rewards/margins": 0.10539399087429047, "rewards/rejected": -0.2801874577999115, "step": 4607 }, { "epoch": 12.616016427104723, "grad_norm": 3.0195913314819336, "learning_rate": 3.687671232876712e-07, "log_odds_chosen": 3.32198429107666, "log_odds_ratio": -0.15453973412513733, "logits/chosen": 0.9118235111236572, "logits/rejected": 0.7916113138198853, "logps/chosen": -1.822485327720642, "logps/rejected": -4.913877487182617, "loss": 0.5286, "nll_loss": 0.5131313800811768, "rewards/accuracies": 1.0, "rewards/chosen": -0.18224851787090302, "rewards/margins": 0.3091392517089844, "rewards/rejected": -0.4913877844810486, "step": 4608 }, { "epoch": 12.618754277891854, "grad_norm": 3.522976875305176, "learning_rate": 3.6863013698630137e-07, "log_odds_chosen": 4.567004680633545, "log_odds_ratio": -0.19536468386650085, "logits/chosen": 0.8833868503570557, "logits/rejected": 0.9048247337341309, "logps/chosen": -1.5359890460968018, "logps/rejected": -5.847562789916992, "loss": 0.5995, "nll_loss": 0.5799151062965393, "rewards/accuracies": 0.875, "rewards/chosen": -0.15359890460968018, "rewards/margins": 0.4311574101448059, "rewards/rejected": -0.5847563147544861, "step": 4609 }, { "epoch": 12.621492128678987, "grad_norm": 3.7984886169433594, "learning_rate": 3.6849315068493147e-07, "log_odds_chosen": 1.4903889894485474, "log_odds_ratio": -0.2993675768375397, "logits/chosen": 1.1471383571624756, "logits/rejected": 1.1175904273986816, "logps/chosen": -1.907781720161438, "logps/rejected": -3.2654671669006348, "loss": 0.5247, "nll_loss": 0.4947156012058258, "rewards/accuracies": 0.875, "rewards/chosen": -0.19077816605567932, "rewards/margins": 0.13576853275299072, "rewards/rejected": -0.32654669880867004, "step": 4610 }, { "epoch": 12.624229979466119, "grad_norm": 3.4222450256347656, "learning_rate": 3.6835616438356167e-07, "log_odds_chosen": 2.4504706859588623, "log_odds_ratio": -0.19972404837608337, "logits/chosen": 0.9032173752784729, "logits/rejected": 0.8677815794944763, "logps/chosen": -2.1366004943847656, "logps/rejected": -4.410900592803955, "loss": 0.5317, "nll_loss": 0.5117531418800354, "rewards/accuracies": 1.0, "rewards/chosen": -0.21366004645824432, "rewards/margins": 0.22743001580238342, "rewards/rejected": -0.44109010696411133, "step": 4611 }, { "epoch": 12.626967830253252, "grad_norm": 3.697322368621826, "learning_rate": 3.6821917808219177e-07, "log_odds_chosen": 3.332850456237793, "log_odds_ratio": -0.15322446823120117, "logits/chosen": 1.2294697761535645, "logits/rejected": 1.2873663902282715, "logps/chosen": -1.9745104312896729, "logps/rejected": -5.149078369140625, "loss": 0.5477, "nll_loss": 0.5324084758758545, "rewards/accuracies": 0.875, "rewards/chosen": -0.19745105504989624, "rewards/margins": 0.31745678186416626, "rewards/rejected": -0.5149078369140625, "step": 4612 }, { "epoch": 12.629705681040383, "grad_norm": 3.8907523155212402, "learning_rate": 3.6808219178082187e-07, "log_odds_chosen": 2.4594662189483643, "log_odds_ratio": -0.2054375559091568, "logits/chosen": 1.064408302307129, "logits/rejected": 1.1063374280929565, "logps/chosen": -2.0983986854553223, "logps/rejected": -4.439957141876221, "loss": 0.6327, "nll_loss": 0.6121307611465454, "rewards/accuracies": 0.875, "rewards/chosen": -0.2098398655653, "rewards/margins": 0.23415586352348328, "rewards/rejected": -0.44399574398994446, "step": 4613 }, { "epoch": 12.632443531827516, "grad_norm": 4.086843013763428, "learning_rate": 3.67945205479452e-07, "log_odds_chosen": 1.6262359619140625, "log_odds_ratio": -0.32994356751441956, "logits/chosen": 1.0551888942718506, "logits/rejected": 1.0115129947662354, "logps/chosen": -2.191713809967041, "logps/rejected": -3.6800341606140137, "loss": 0.5818, "nll_loss": 0.5488498210906982, "rewards/accuracies": 0.875, "rewards/chosen": -0.219171404838562, "rewards/margins": 0.1488320529460907, "rewards/rejected": -0.3680034279823303, "step": 4614 }, { "epoch": 12.635181382614647, "grad_norm": 3.3572378158569336, "learning_rate": 3.6780821917808217e-07, "log_odds_chosen": 3.5012612342834473, "log_odds_ratio": -0.1269436776638031, "logits/chosen": 1.1287314891815186, "logits/rejected": 1.188300371170044, "logps/chosen": -1.6597011089324951, "logps/rejected": -4.920324325561523, "loss": 0.5279, "nll_loss": 0.5152336955070496, "rewards/accuracies": 1.0, "rewards/chosen": -0.16597013175487518, "rewards/margins": 0.32606229186058044, "rewards/rejected": -0.4920324385166168, "step": 4615 }, { "epoch": 12.63791923340178, "grad_norm": 3.5097708702087402, "learning_rate": 3.676712328767123e-07, "log_odds_chosen": 3.438886880874634, "log_odds_ratio": -0.12062611430883408, "logits/chosen": 1.1899069547653198, "logits/rejected": 1.2603247165679932, "logps/chosen": -2.1016907691955566, "logps/rejected": -5.3067731857299805, "loss": 0.5983, "nll_loss": 0.5862410664558411, "rewards/accuracies": 1.0, "rewards/chosen": -0.21016907691955566, "rewards/margins": 0.3205082416534424, "rewards/rejected": -0.530677318572998, "step": 4616 }, { "epoch": 12.640657084188911, "grad_norm": 3.4378700256347656, "learning_rate": 3.675342465753424e-07, "log_odds_chosen": 4.854825019836426, "log_odds_ratio": -0.11440646648406982, "logits/chosen": 1.0608960390090942, "logits/rejected": 1.0253499746322632, "logps/chosen": -2.327127456665039, "logps/rejected": -6.997922420501709, "loss": 0.7039, "nll_loss": 0.6924853324890137, "rewards/accuracies": 1.0, "rewards/chosen": -0.2327127456665039, "rewards/margins": 0.4670794904232025, "rewards/rejected": -0.6997922658920288, "step": 4617 }, { "epoch": 12.643394934976044, "grad_norm": 3.613722324371338, "learning_rate": 3.6739726027397263e-07, "log_odds_chosen": 2.787893295288086, "log_odds_ratio": -0.12186989188194275, "logits/chosen": 1.2939600944519043, "logits/rejected": 1.3472788333892822, "logps/chosen": -2.0501747131347656, "logps/rejected": -4.7192230224609375, "loss": 0.5305, "nll_loss": 0.5183554887771606, "rewards/accuracies": 1.0, "rewards/chosen": -0.20501749217510223, "rewards/margins": 0.2669048309326172, "rewards/rejected": -0.4719223380088806, "step": 4618 }, { "epoch": 12.646132785763175, "grad_norm": 3.924023151397705, "learning_rate": 3.6726027397260273e-07, "log_odds_chosen": 2.976382255554199, "log_odds_ratio": -0.18905244767665863, "logits/chosen": 0.7267993688583374, "logits/rejected": 0.7272120714187622, "logps/chosen": -2.8740158081054688, "logps/rejected": -5.727766036987305, "loss": 0.7273, "nll_loss": 0.7083624601364136, "rewards/accuracies": 0.875, "rewards/chosen": -0.28740161657333374, "rewards/margins": 0.2853749990463257, "rewards/rejected": -0.5727765560150146, "step": 4619 }, { "epoch": 12.648870636550308, "grad_norm": 4.322771072387695, "learning_rate": 3.6712328767123283e-07, "log_odds_chosen": 1.9221079349517822, "log_odds_ratio": -0.34329134225845337, "logits/chosen": 1.0089436769485474, "logits/rejected": 1.008718729019165, "logps/chosen": -3.4392294883728027, "logps/rejected": -5.32119607925415, "loss": 0.8473, "nll_loss": 0.8129453063011169, "rewards/accuracies": 0.875, "rewards/chosen": -0.3439229726791382, "rewards/margins": 0.18819665908813477, "rewards/rejected": -0.532119631767273, "step": 4620 }, { "epoch": 12.651608487337441, "grad_norm": 8.527912139892578, "learning_rate": 3.66986301369863e-07, "log_odds_chosen": 1.0643982887268066, "log_odds_ratio": -0.6165008544921875, "logits/chosen": 1.2118513584136963, "logits/rejected": 1.1857142448425293, "logps/chosen": -2.7051947116851807, "logps/rejected": -3.645399332046509, "loss": 0.8001, "nll_loss": 0.738471508026123, "rewards/accuracies": 0.75, "rewards/chosen": -0.2705194652080536, "rewards/margins": 0.09402048587799072, "rewards/rejected": -0.3645399808883667, "step": 4621 }, { "epoch": 12.654346338124572, "grad_norm": 3.6023528575897217, "learning_rate": 3.6684931506849313e-07, "log_odds_chosen": 3.0736591815948486, "log_odds_ratio": -0.29748761653900146, "logits/chosen": 1.0398370027542114, "logits/rejected": 1.0814911127090454, "logps/chosen": -2.00376296043396, "logps/rejected": -4.885366916656494, "loss": 0.5952, "nll_loss": 0.5654245615005493, "rewards/accuracies": 0.875, "rewards/chosen": -0.20037630200386047, "rewards/margins": 0.28816038370132446, "rewards/rejected": -0.48853665590286255, "step": 4622 }, { "epoch": 12.657084188911703, "grad_norm": 5.622645854949951, "learning_rate": 3.667123287671233e-07, "log_odds_chosen": 2.7580325603485107, "log_odds_ratio": -0.34828078746795654, "logits/chosen": 1.0415014028549194, "logits/rejected": 1.068263053894043, "logps/chosen": -2.3145933151245117, "logps/rejected": -4.988283157348633, "loss": 0.7713, "nll_loss": 0.7364251613616943, "rewards/accuracies": 0.875, "rewards/chosen": -0.23145931959152222, "rewards/margins": 0.26736900210380554, "rewards/rejected": -0.49882835149765015, "step": 4623 }, { "epoch": 12.659822039698836, "grad_norm": 3.2555594444274902, "learning_rate": 3.665753424657534e-07, "log_odds_chosen": 3.9705145359039307, "log_odds_ratio": -0.14383387565612793, "logits/chosen": 1.2800970077514648, "logits/rejected": 1.297615647315979, "logps/chosen": -2.2536044120788574, "logps/rejected": -6.082888603210449, "loss": 0.5839, "nll_loss": 0.5695041418075562, "rewards/accuracies": 1.0, "rewards/chosen": -0.2253604382276535, "rewards/margins": 0.3829284906387329, "rewards/rejected": -0.6082890033721924, "step": 4624 }, { "epoch": 12.66255989048597, "grad_norm": 4.30277156829834, "learning_rate": 3.664383561643836e-07, "log_odds_chosen": 1.9888118505477905, "log_odds_ratio": -0.22078341245651245, "logits/chosen": 0.8422781825065613, "logits/rejected": 0.7806104421615601, "logps/chosen": -1.6869332790374756, "logps/rejected": -3.4866843223571777, "loss": 0.4756, "nll_loss": 0.45355960726737976, "rewards/accuracies": 1.0, "rewards/chosen": -0.16869333386421204, "rewards/margins": 0.17997512221336365, "rewards/rejected": -0.3486684560775757, "step": 4625 }, { "epoch": 12.6652977412731, "grad_norm": 3.633711576461792, "learning_rate": 3.663013698630137e-07, "log_odds_chosen": 2.8158254623413086, "log_odds_ratio": -0.25596868991851807, "logits/chosen": 1.320946216583252, "logits/rejected": 1.315804123878479, "logps/chosen": -1.8267242908477783, "logps/rejected": -4.456287384033203, "loss": 0.545, "nll_loss": 0.5193578600883484, "rewards/accuracies": 1.0, "rewards/chosen": -0.1826724261045456, "rewards/margins": 0.26295632123947144, "rewards/rejected": -0.4456287622451782, "step": 4626 }, { "epoch": 12.668035592060233, "grad_norm": 4.035964012145996, "learning_rate": 3.661643835616438e-07, "log_odds_chosen": 2.973301410675049, "log_odds_ratio": -0.11714331060647964, "logits/chosen": 1.1068214178085327, "logits/rejected": 1.153786301612854, "logps/chosen": -2.228311061859131, "logps/rejected": -5.073612213134766, "loss": 0.6848, "nll_loss": 0.6730425953865051, "rewards/accuracies": 1.0, "rewards/chosen": -0.222831130027771, "rewards/margins": 0.2845301628112793, "rewards/rejected": -0.5073612928390503, "step": 4627 }, { "epoch": 12.670773442847365, "grad_norm": 3.809553861618042, "learning_rate": 3.6602739726027394e-07, "log_odds_chosen": 2.6393637657165527, "log_odds_ratio": -0.1747257113456726, "logits/chosen": 0.9540450572967529, "logits/rejected": 0.9568617343902588, "logps/chosen": -1.5864694118499756, "logps/rejected": -4.007883548736572, "loss": 0.5122, "nll_loss": 0.4947027564048767, "rewards/accuracies": 1.0, "rewards/chosen": -0.15864694118499756, "rewards/margins": 0.24214141070842743, "rewards/rejected": -0.4007883667945862, "step": 4628 }, { "epoch": 12.673511293634498, "grad_norm": 3.2201082706451416, "learning_rate": 3.658904109589041e-07, "log_odds_chosen": 3.0066065788269043, "log_odds_ratio": -0.13489413261413574, "logits/chosen": 0.9034991264343262, "logits/rejected": 0.9202370047569275, "logps/chosen": -1.9985853433609009, "logps/rejected": -4.861106872558594, "loss": 0.6604, "nll_loss": 0.6468924283981323, "rewards/accuracies": 1.0, "rewards/chosen": -0.19985854625701904, "rewards/margins": 0.2862521708011627, "rewards/rejected": -0.4861106872558594, "step": 4629 }, { "epoch": 12.676249144421629, "grad_norm": 4.084874629974365, "learning_rate": 3.6575342465753424e-07, "log_odds_chosen": 1.204056739807129, "log_odds_ratio": -0.370374858379364, "logits/chosen": 0.8864613771438599, "logits/rejected": 0.8428106307983398, "logps/chosen": -2.703361749649048, "logps/rejected": -3.7593793869018555, "loss": 0.5283, "nll_loss": 0.49130702018737793, "rewards/accuracies": 1.0, "rewards/chosen": -0.27033618092536926, "rewards/margins": 0.10560175776481628, "rewards/rejected": -0.37593793869018555, "step": 4630 }, { "epoch": 12.678986995208762, "grad_norm": 4.014964580535889, "learning_rate": 3.6561643835616434e-07, "log_odds_chosen": 3.052830696105957, "log_odds_ratio": -0.20800134539604187, "logits/chosen": 0.8808274269104004, "logits/rejected": 0.8985470533370972, "logps/chosen": -1.7857482433319092, "logps/rejected": -4.589988708496094, "loss": 0.5098, "nll_loss": 0.4889959692955017, "rewards/accuracies": 0.875, "rewards/chosen": -0.1785748302936554, "rewards/margins": 0.2804240882396698, "rewards/rejected": -0.4589989185333252, "step": 4631 }, { "epoch": 12.681724845995893, "grad_norm": 3.1482200622558594, "learning_rate": 3.6547945205479455e-07, "log_odds_chosen": 3.115913152694702, "log_odds_ratio": -0.14295294880867004, "logits/chosen": 1.0307016372680664, "logits/rejected": 1.02327561378479, "logps/chosen": -1.7902264595031738, "logps/rejected": -4.744889259338379, "loss": 0.5465, "nll_loss": 0.5322235226631165, "rewards/accuracies": 1.0, "rewards/chosen": -0.1790226399898529, "rewards/margins": 0.29546627402305603, "rewards/rejected": -0.47448891401290894, "step": 4632 }, { "epoch": 12.684462696783026, "grad_norm": 6.322404384613037, "learning_rate": 3.6534246575342465e-07, "log_odds_chosen": 2.2389190196990967, "log_odds_ratio": -0.5326497554779053, "logits/chosen": 1.3307101726531982, "logits/rejected": 1.3374789953231812, "logps/chosen": -2.982462167739868, "logps/rejected": -5.155725479125977, "loss": 0.5886, "nll_loss": 0.5353763699531555, "rewards/accuracies": 0.875, "rewards/chosen": -0.29824620485305786, "rewards/margins": 0.2173263132572174, "rewards/rejected": -0.5155725479125977, "step": 4633 }, { "epoch": 12.687200547570157, "grad_norm": 3.9054672718048096, "learning_rate": 3.6520547945205474e-07, "log_odds_chosen": 2.414785146713257, "log_odds_ratio": -0.6209120154380798, "logits/chosen": 1.029728889465332, "logits/rejected": 1.1127347946166992, "logps/chosen": -2.0120770931243896, "logps/rejected": -4.295844078063965, "loss": 0.5018, "nll_loss": 0.43968355655670166, "rewards/accuracies": 0.875, "rewards/chosen": -0.2012077122926712, "rewards/margins": 0.22837668657302856, "rewards/rejected": -0.4295843839645386, "step": 4634 }, { "epoch": 12.68993839835729, "grad_norm": 4.400863170623779, "learning_rate": 3.6506849315068495e-07, "log_odds_chosen": 2.947425365447998, "log_odds_ratio": -0.2863004207611084, "logits/chosen": 1.0642526149749756, "logits/rejected": 1.0821490287780762, "logps/chosen": -2.5111773014068604, "logps/rejected": -5.343869209289551, "loss": 0.7431, "nll_loss": 0.7145180702209473, "rewards/accuracies": 0.875, "rewards/chosen": -0.2511177361011505, "rewards/margins": 0.2832691967487335, "rewards/rejected": -0.534386932849884, "step": 4635 }, { "epoch": 12.692676249144421, "grad_norm": 3.19315767288208, "learning_rate": 3.6493150684931505e-07, "log_odds_chosen": 5.583465576171875, "log_odds_ratio": -0.023571603000164032, "logits/chosen": 1.422142505645752, "logits/rejected": 1.4566144943237305, "logps/chosen": -1.9955933094024658, "logps/rejected": -7.227876663208008, "loss": 0.5081, "nll_loss": 0.5057216882705688, "rewards/accuracies": 1.0, "rewards/chosen": -0.19955933094024658, "rewards/margins": 0.5232283473014832, "rewards/rejected": -0.722787618637085, "step": 4636 }, { "epoch": 12.695414099931554, "grad_norm": 3.4594955444335938, "learning_rate": 3.647945205479452e-07, "log_odds_chosen": 3.3817806243896484, "log_odds_ratio": -0.2670345604419708, "logits/chosen": 0.820406436920166, "logits/rejected": 0.795842170715332, "logps/chosen": -2.084852457046509, "logps/rejected": -5.33414888381958, "loss": 0.656, "nll_loss": 0.6292479634284973, "rewards/accuracies": 0.875, "rewards/chosen": -0.20848526060581207, "rewards/margins": 0.3249296247959137, "rewards/rejected": -0.5334148406982422, "step": 4637 }, { "epoch": 12.698151950718685, "grad_norm": 3.540398120880127, "learning_rate": 3.646575342465753e-07, "log_odds_chosen": 2.803429126739502, "log_odds_ratio": -0.195823073387146, "logits/chosen": 1.2946186065673828, "logits/rejected": 1.334082007408142, "logps/chosen": -1.6684750318527222, "logps/rejected": -4.168939590454102, "loss": 0.465, "nll_loss": 0.44540271162986755, "rewards/accuracies": 1.0, "rewards/chosen": -0.16684748232364655, "rewards/margins": 0.2500464618206024, "rewards/rejected": -0.41689395904541016, "step": 4638 }, { "epoch": 12.700889801505818, "grad_norm": 3.786989688873291, "learning_rate": 3.645205479452055e-07, "log_odds_chosen": 2.3320388793945312, "log_odds_ratio": -0.20678266882896423, "logits/chosen": 0.7580073475837708, "logits/rejected": 0.7085491418838501, "logps/chosen": -1.7458243370056152, "logps/rejected": -3.92714524269104, "loss": 0.5098, "nll_loss": 0.4891088604927063, "rewards/accuracies": 1.0, "rewards/chosen": -0.17458242177963257, "rewards/margins": 0.21813207864761353, "rewards/rejected": -0.3927145004272461, "step": 4639 }, { "epoch": 12.70362765229295, "grad_norm": 3.213838815689087, "learning_rate": 3.643835616438356e-07, "log_odds_chosen": 3.642017364501953, "log_odds_ratio": -0.15256816148757935, "logits/chosen": 1.1490800380706787, "logits/rejected": 1.0815259218215942, "logps/chosen": -1.8331626653671265, "logps/rejected": -5.276950836181641, "loss": 0.5855, "nll_loss": 0.5702892541885376, "rewards/accuracies": 1.0, "rewards/chosen": -0.18331626057624817, "rewards/margins": 0.34437882900238037, "rewards/rejected": -0.5276951193809509, "step": 4640 }, { "epoch": 12.706365503080082, "grad_norm": 3.4972589015960693, "learning_rate": 3.642465753424657e-07, "log_odds_chosen": 2.1502633094787598, "log_odds_ratio": -0.18661841750144958, "logits/chosen": 0.9902899265289307, "logits/rejected": 1.0099077224731445, "logps/chosen": -1.8227767944335938, "logps/rejected": -3.800161361694336, "loss": 0.4731, "nll_loss": 0.45444583892822266, "rewards/accuracies": 1.0, "rewards/chosen": -0.18227767944335938, "rewards/margins": 0.19773845374584198, "rewards/rejected": -0.38001614809036255, "step": 4641 }, { "epoch": 12.709103353867214, "grad_norm": 3.129289150238037, "learning_rate": 3.641095890410959e-07, "log_odds_chosen": 4.193573951721191, "log_odds_ratio": -0.063526451587677, "logits/chosen": 1.1090556383132935, "logits/rejected": 1.1651477813720703, "logps/chosen": -1.3951407670974731, "logps/rejected": -5.254611492156982, "loss": 0.6189, "nll_loss": 0.612536609172821, "rewards/accuracies": 1.0, "rewards/chosen": -0.13951408863067627, "rewards/margins": 0.38594701886177063, "rewards/rejected": -0.5254610776901245, "step": 4642 }, { "epoch": 12.711841204654347, "grad_norm": 5.128759384155273, "learning_rate": 3.63972602739726e-07, "log_odds_chosen": 3.530092716217041, "log_odds_ratio": -0.3169412612915039, "logits/chosen": 1.1731592416763306, "logits/rejected": 1.1926764249801636, "logps/chosen": -3.4280507564544678, "logps/rejected": -6.857545852661133, "loss": 0.605, "nll_loss": 0.5732759833335876, "rewards/accuracies": 0.875, "rewards/chosen": -0.34280505776405334, "rewards/margins": 0.3429495394229889, "rewards/rejected": -0.685754656791687, "step": 4643 }, { "epoch": 12.714579055441478, "grad_norm": 3.6778736114501953, "learning_rate": 3.6383561643835616e-07, "log_odds_chosen": 3.579226493835449, "log_odds_ratio": -0.17644129693508148, "logits/chosen": 1.0405044555664062, "logits/rejected": 1.0960149765014648, "logps/chosen": -2.0653295516967773, "logps/rejected": -5.511507034301758, "loss": 0.6432, "nll_loss": 0.6255946755409241, "rewards/accuracies": 1.0, "rewards/chosen": -0.20653295516967773, "rewards/margins": 0.34461772441864014, "rewards/rejected": -0.5511506795883179, "step": 4644 }, { "epoch": 12.71731690622861, "grad_norm": 4.611130714416504, "learning_rate": 3.6369863013698626e-07, "log_odds_chosen": 2.0991291999816895, "log_odds_ratio": -0.3096424341201782, "logits/chosen": 1.3948577642440796, "logits/rejected": 1.418689250946045, "logps/chosen": -2.661874532699585, "logps/rejected": -4.6335906982421875, "loss": 0.5944, "nll_loss": 0.5633924007415771, "rewards/accuracies": 0.875, "rewards/chosen": -0.2661874294281006, "rewards/margins": 0.1971716284751892, "rewards/rejected": -0.4633590877056122, "step": 4645 }, { "epoch": 12.720054757015742, "grad_norm": 3.820127487182617, "learning_rate": 3.6356164383561646e-07, "log_odds_chosen": 2.7436683177948, "log_odds_ratio": -0.18491250276565552, "logits/chosen": 0.912213921546936, "logits/rejected": 0.8524643778800964, "logps/chosen": -2.541800022125244, "logps/rejected": -5.145867347717285, "loss": 0.5297, "nll_loss": 0.5112018585205078, "rewards/accuracies": 1.0, "rewards/chosen": -0.25418001413345337, "rewards/margins": 0.2604067325592041, "rewards/rejected": -0.5145867466926575, "step": 4646 }, { "epoch": 12.722792607802875, "grad_norm": 5.131227493286133, "learning_rate": 3.6342465753424656e-07, "log_odds_chosen": 2.628359317779541, "log_odds_ratio": -0.19237148761749268, "logits/chosen": 1.2370821237564087, "logits/rejected": 1.2623823881149292, "logps/chosen": -2.4593119621276855, "logps/rejected": -4.934811592102051, "loss": 0.5612, "nll_loss": 0.541972279548645, "rewards/accuracies": 1.0, "rewards/chosen": -0.2459312230348587, "rewards/margins": 0.24754998087882996, "rewards/rejected": -0.49348121881484985, "step": 4647 }, { "epoch": 12.725530458590008, "grad_norm": 3.228325366973877, "learning_rate": 3.6328767123287666e-07, "log_odds_chosen": 2.694505214691162, "log_odds_ratio": -0.19259853661060333, "logits/chosen": 1.0105255842208862, "logits/rejected": 0.9212015867233276, "logps/chosen": -1.9096208810806274, "logps/rejected": -4.391973495483398, "loss": 0.6099, "nll_loss": 0.5906065106391907, "rewards/accuracies": 1.0, "rewards/chosen": -0.19096209108829498, "rewards/margins": 0.24823525547981262, "rewards/rejected": -0.4391973614692688, "step": 4648 }, { "epoch": 12.728268309377139, "grad_norm": 5.291393756866455, "learning_rate": 3.6315068493150687e-07, "log_odds_chosen": 1.6698790788650513, "log_odds_ratio": -0.33027857542037964, "logits/chosen": 1.2916252613067627, "logits/rejected": 1.2563655376434326, "logps/chosen": -1.478144884109497, "logps/rejected": -2.927410125732422, "loss": 0.5835, "nll_loss": 0.5504732728004456, "rewards/accuracies": 1.0, "rewards/chosen": -0.14781449735164642, "rewards/margins": 0.144926518201828, "rewards/rejected": -0.29274100065231323, "step": 4649 }, { "epoch": 12.73100616016427, "grad_norm": 3.4066264629364014, "learning_rate": 3.6301369863013697e-07, "log_odds_chosen": 3.299530267715454, "log_odds_ratio": -0.13430500030517578, "logits/chosen": 0.8902420997619629, "logits/rejected": 0.8724050521850586, "logps/chosen": -1.719570279121399, "logps/rejected": -4.818498611450195, "loss": 0.4829, "nll_loss": 0.46946653723716736, "rewards/accuracies": 1.0, "rewards/chosen": -0.17195703089237213, "rewards/margins": 0.30989277362823486, "rewards/rejected": -0.4818498492240906, "step": 4650 }, { "epoch": 12.733744010951403, "grad_norm": 4.121438026428223, "learning_rate": 3.628767123287671e-07, "log_odds_chosen": 1.514643907546997, "log_odds_ratio": -0.36415696144104004, "logits/chosen": 1.0236823558807373, "logits/rejected": 0.9524677395820618, "logps/chosen": -2.059072732925415, "logps/rejected": -3.438283920288086, "loss": 0.5745, "nll_loss": 0.5380760431289673, "rewards/accuracies": 0.875, "rewards/chosen": -0.20590725541114807, "rewards/margins": 0.13792115449905396, "rewards/rejected": -0.3438284397125244, "step": 4651 }, { "epoch": 12.736481861738536, "grad_norm": 3.4948158264160156, "learning_rate": 3.627397260273972e-07, "log_odds_chosen": 3.575709581375122, "log_odds_ratio": -0.14344145357608795, "logits/chosen": 1.0030630826950073, "logits/rejected": 0.9479695558547974, "logps/chosen": -1.557079553604126, "logps/rejected": -4.942193508148193, "loss": 0.5359, "nll_loss": 0.5215279459953308, "rewards/accuracies": 1.0, "rewards/chosen": -0.1557079404592514, "rewards/margins": 0.3385113775730133, "rewards/rejected": -0.4942193329334259, "step": 4652 }, { "epoch": 12.739219712525667, "grad_norm": 3.4201323986053467, "learning_rate": 3.6260273972602737e-07, "log_odds_chosen": 2.370481491088867, "log_odds_ratio": -0.269538551568985, "logits/chosen": 1.174277424812317, "logits/rejected": 1.170243740081787, "logps/chosen": -1.8307592868804932, "logps/rejected": -4.081760406494141, "loss": 0.5642, "nll_loss": 0.5372604727745056, "rewards/accuracies": 0.875, "rewards/chosen": -0.1830759346485138, "rewards/margins": 0.2251001000404358, "rewards/rejected": -0.4081760048866272, "step": 4653 }, { "epoch": 12.7419575633128, "grad_norm": 3.0974557399749756, "learning_rate": 3.624657534246575e-07, "log_odds_chosen": 5.244103908538818, "log_odds_ratio": -0.043115485459566116, "logits/chosen": 1.2662944793701172, "logits/rejected": 1.337558388710022, "logps/chosen": -1.4821958541870117, "logps/rejected": -6.44287109375, "loss": 0.5397, "nll_loss": 0.5354242324829102, "rewards/accuracies": 1.0, "rewards/chosen": -0.14821958541870117, "rewards/margins": 0.49606749415397644, "rewards/rejected": -0.6442870497703552, "step": 4654 }, { "epoch": 12.744695414099931, "grad_norm": 3.9674582481384277, "learning_rate": 3.623287671232876e-07, "log_odds_chosen": 2.437350034713745, "log_odds_ratio": -0.3820894956588745, "logits/chosen": 1.0463008880615234, "logits/rejected": 1.0877161026000977, "logps/chosen": -1.8175917863845825, "logps/rejected": -4.1232075691223145, "loss": 0.5787, "nll_loss": 0.5404787659645081, "rewards/accuracies": 0.75, "rewards/chosen": -0.18175917863845825, "rewards/margins": 0.23056159913539886, "rewards/rejected": -0.4123207926750183, "step": 4655 }, { "epoch": 12.747433264887064, "grad_norm": 3.51481032371521, "learning_rate": 3.621917808219178e-07, "log_odds_chosen": 5.091853618621826, "log_odds_ratio": -0.11882741749286652, "logits/chosen": 0.9365484714508057, "logits/rejected": 0.9264001846313477, "logps/chosen": -1.7336397171020508, "logps/rejected": -6.572198390960693, "loss": 0.5873, "nll_loss": 0.5754555463790894, "rewards/accuracies": 0.875, "rewards/chosen": -0.17336396872997284, "rewards/margins": 0.4838559031486511, "rewards/rejected": -0.6572198271751404, "step": 4656 }, { "epoch": 12.750171115674195, "grad_norm": 3.618553400039673, "learning_rate": 3.620547945205479e-07, "log_odds_chosen": 3.4803667068481445, "log_odds_ratio": -0.33174991607666016, "logits/chosen": 1.2023247480392456, "logits/rejected": 1.2300746440887451, "logps/chosen": -1.564568042755127, "logps/rejected": -4.573864936828613, "loss": 0.5216, "nll_loss": 0.48845213651657104, "rewards/accuracies": 0.875, "rewards/chosen": -0.1564568132162094, "rewards/margins": 0.3009296655654907, "rewards/rejected": -0.45738649368286133, "step": 4657 }, { "epoch": 12.752908966461328, "grad_norm": 8.38133430480957, "learning_rate": 3.619178082191781e-07, "log_odds_chosen": 5.0313496589660645, "log_odds_ratio": -0.17941482365131378, "logits/chosen": 1.083425521850586, "logits/rejected": 1.0581351518630981, "logps/chosen": -2.0211682319641113, "logps/rejected": -6.912087917327881, "loss": 0.7141, "nll_loss": 0.6961884498596191, "rewards/accuracies": 1.0, "rewards/chosen": -0.20211684703826904, "rewards/margins": 0.48909199237823486, "rewards/rejected": -0.6912088394165039, "step": 4658 }, { "epoch": 12.75564681724846, "grad_norm": 3.4794843196868896, "learning_rate": 3.617808219178082e-07, "log_odds_chosen": 2.550579071044922, "log_odds_ratio": -0.21688830852508545, "logits/chosen": 1.077014446258545, "logits/rejected": 0.9619475603103638, "logps/chosen": -1.5725892782211304, "logps/rejected": -3.9031929969787598, "loss": 0.5354, "nll_loss": 0.5136627554893494, "rewards/accuracies": 1.0, "rewards/chosen": -0.15725894272327423, "rewards/margins": 0.2330603450536728, "rewards/rejected": -0.390319287776947, "step": 4659 }, { "epoch": 12.758384668035593, "grad_norm": 3.583611011505127, "learning_rate": 3.6164383561643833e-07, "log_odds_chosen": 3.2188210487365723, "log_odds_ratio": -0.1499108076095581, "logits/chosen": 0.9214286804199219, "logits/rejected": 0.9566612243652344, "logps/chosen": -2.298891067504883, "logps/rejected": -5.305376052856445, "loss": 0.7119, "nll_loss": 0.6969471573829651, "rewards/accuracies": 1.0, "rewards/chosen": -0.22988909482955933, "rewards/margins": 0.30064845085144043, "rewards/rejected": -0.5305375456809998, "step": 4660 }, { "epoch": 12.761122518822724, "grad_norm": 10.879369735717773, "learning_rate": 3.615068493150685e-07, "log_odds_chosen": 2.7535243034362793, "log_odds_ratio": -0.4250113368034363, "logits/chosen": 0.9941101670265198, "logits/rejected": 0.8731895089149475, "logps/chosen": -2.5205211639404297, "logps/rejected": -5.0773773193359375, "loss": 0.7034, "nll_loss": 0.6608627438545227, "rewards/accuracies": 0.875, "rewards/chosen": -0.2520521283149719, "rewards/margins": 0.25568559765815735, "rewards/rejected": -0.5077377557754517, "step": 4661 }, { "epoch": 12.763860369609857, "grad_norm": 4.174530029296875, "learning_rate": 3.613698630136986e-07, "log_odds_chosen": 1.6791563034057617, "log_odds_ratio": -0.3375513255596161, "logits/chosen": 1.230725884437561, "logits/rejected": 1.245294213294983, "logps/chosen": -1.5678343772888184, "logps/rejected": -3.1182544231414795, "loss": 0.5429, "nll_loss": 0.509155809879303, "rewards/accuracies": 0.875, "rewards/chosen": -0.15678343176841736, "rewards/margins": 0.15504199266433716, "rewards/rejected": -0.3118254244327545, "step": 4662 }, { "epoch": 12.766598220396988, "grad_norm": 7.184711933135986, "learning_rate": 3.612328767123288e-07, "log_odds_chosen": 4.022948741912842, "log_odds_ratio": -0.19624540209770203, "logits/chosen": 0.9414620995521545, "logits/rejected": 0.8869246244430542, "logps/chosen": -1.6733940839767456, "logps/rejected": -5.55601167678833, "loss": 0.5622, "nll_loss": 0.5425287485122681, "rewards/accuracies": 0.875, "rewards/chosen": -0.16733941435813904, "rewards/margins": 0.3882617652416229, "rewards/rejected": -0.555601179599762, "step": 4663 }, { "epoch": 12.76933607118412, "grad_norm": 3.404411554336548, "learning_rate": 3.610958904109589e-07, "log_odds_chosen": 3.4919931888580322, "log_odds_ratio": -0.1753227710723877, "logits/chosen": 1.282761573791504, "logits/rejected": 1.2643113136291504, "logps/chosen": -1.9370074272155762, "logps/rejected": -5.310281276702881, "loss": 0.5877, "nll_loss": 0.5701258182525635, "rewards/accuracies": 1.0, "rewards/chosen": -0.19370074570178986, "rewards/margins": 0.33732742071151733, "rewards/rejected": -0.531028151512146, "step": 4664 }, { "epoch": 12.772073921971252, "grad_norm": 4.600424289703369, "learning_rate": 3.6095890410958904e-07, "log_odds_chosen": 1.8503496646881104, "log_odds_ratio": -0.46665528416633606, "logits/chosen": 1.0940238237380981, "logits/rejected": 0.9949157238006592, "logps/chosen": -1.751204252243042, "logps/rejected": -3.4996137619018555, "loss": 0.5037, "nll_loss": 0.4570498764514923, "rewards/accuracies": 0.875, "rewards/chosen": -0.17512041330337524, "rewards/margins": 0.17484094202518463, "rewards/rejected": -0.34996140003204346, "step": 4665 }, { "epoch": 12.774811772758385, "grad_norm": 3.7128210067749023, "learning_rate": 3.608219178082192e-07, "log_odds_chosen": 2.097322940826416, "log_odds_ratio": -0.22152869403362274, "logits/chosen": 1.118446707725525, "logits/rejected": 1.1299209594726562, "logps/chosen": -1.6623272895812988, "logps/rejected": -3.6304750442504883, "loss": 0.5092, "nll_loss": 0.4870607256889343, "rewards/accuracies": 1.0, "rewards/chosen": -0.16623272001743317, "rewards/margins": 0.19681477546691895, "rewards/rejected": -0.3630474805831909, "step": 4666 }, { "epoch": 12.777549623545516, "grad_norm": 3.9811410903930664, "learning_rate": 3.606849315068493e-07, "log_odds_chosen": 2.6783974170684814, "log_odds_ratio": -0.3259694278240204, "logits/chosen": 1.133389949798584, "logits/rejected": 1.1242815256118774, "logps/chosen": -1.9871782064437866, "logps/rejected": -4.425949573516846, "loss": 0.4767, "nll_loss": 0.44414064288139343, "rewards/accuracies": 0.875, "rewards/chosen": -0.19871783256530762, "rewards/margins": 0.2438771277666092, "rewards/rejected": -0.4425949454307556, "step": 4667 }, { "epoch": 12.780287474332649, "grad_norm": 3.779888153076172, "learning_rate": 3.6054794520547944e-07, "log_odds_chosen": 2.3276336193084717, "log_odds_ratio": -0.21104878187179565, "logits/chosen": 0.9899454712867737, "logits/rejected": 1.017367959022522, "logps/chosen": -2.014559268951416, "logps/rejected": -4.193706035614014, "loss": 0.6375, "nll_loss": 0.6163901686668396, "rewards/accuracies": 1.0, "rewards/chosen": -0.20145593583583832, "rewards/margins": 0.21791470050811768, "rewards/rejected": -0.4193706512451172, "step": 4668 }, { "epoch": 12.78302532511978, "grad_norm": 3.445197582244873, "learning_rate": 3.6041095890410954e-07, "log_odds_chosen": 4.135766506195068, "log_odds_ratio": -0.19635841250419617, "logits/chosen": 0.9906736016273499, "logits/rejected": 1.0171332359313965, "logps/chosen": -1.8347986936569214, "logps/rejected": -5.815502166748047, "loss": 0.625, "nll_loss": 0.6054139137268066, "rewards/accuracies": 0.875, "rewards/chosen": -0.18347987532615662, "rewards/margins": 0.398070365190506, "rewards/rejected": -0.5815502405166626, "step": 4669 }, { "epoch": 12.785763175906913, "grad_norm": 4.113996982574463, "learning_rate": 3.6027397260273974e-07, "log_odds_chosen": 3.0499320030212402, "log_odds_ratio": -0.12539619207382202, "logits/chosen": 1.041886329650879, "logits/rejected": 1.0724273920059204, "logps/chosen": -1.7069549560546875, "logps/rejected": -4.50388765335083, "loss": 0.5694, "nll_loss": 0.5568199157714844, "rewards/accuracies": 1.0, "rewards/chosen": -0.1706954836845398, "rewards/margins": 0.27969327569007874, "rewards/rejected": -0.4503887891769409, "step": 4670 }, { "epoch": 12.788501026694044, "grad_norm": 3.9121880531311035, "learning_rate": 3.6013698630136984e-07, "log_odds_chosen": 4.0825514793396, "log_odds_ratio": -0.1188436895608902, "logits/chosen": 1.0493489503860474, "logits/rejected": 1.0436605215072632, "logps/chosen": -1.708661675453186, "logps/rejected": -5.612832069396973, "loss": 0.561, "nll_loss": 0.5490802526473999, "rewards/accuracies": 1.0, "rewards/chosen": -0.1708661913871765, "rewards/margins": 0.39041706919670105, "rewards/rejected": -0.5612832307815552, "step": 4671 }, { "epoch": 12.791238877481177, "grad_norm": 3.924940586090088, "learning_rate": 3.6e-07, "log_odds_chosen": 2.5125157833099365, "log_odds_ratio": -0.23467060923576355, "logits/chosen": 0.9298422336578369, "logits/rejected": 0.8978074193000793, "logps/chosen": -2.089224100112915, "logps/rejected": -4.440062046051025, "loss": 0.5589, "nll_loss": 0.5354510545730591, "rewards/accuracies": 1.0, "rewards/chosen": -0.20892241597175598, "rewards/margins": 0.23508378863334656, "rewards/rejected": -0.44400620460510254, "step": 4672 }, { "epoch": 12.793976728268309, "grad_norm": 3.5249545574188232, "learning_rate": 3.5986301369863015e-07, "log_odds_chosen": 4.717895984649658, "log_odds_ratio": -0.10832089185714722, "logits/chosen": 1.1842031478881836, "logits/rejected": 1.1384007930755615, "logps/chosen": -1.6262025833129883, "logps/rejected": -6.1251373291015625, "loss": 0.5344, "nll_loss": 0.5235556364059448, "rewards/accuracies": 1.0, "rewards/chosen": -0.16262026131153107, "rewards/margins": 0.44989344477653503, "rewards/rejected": -0.6125137209892273, "step": 4673 }, { "epoch": 12.796714579055442, "grad_norm": 3.1796510219573975, "learning_rate": 3.5972602739726025e-07, "log_odds_chosen": 2.4601354598999023, "log_odds_ratio": -0.23463496565818787, "logits/chosen": 1.2481858730316162, "logits/rejected": 1.2380226850509644, "logps/chosen": -1.81404709815979, "logps/rejected": -4.127751350402832, "loss": 0.5788, "nll_loss": 0.555362343788147, "rewards/accuracies": 1.0, "rewards/chosen": -0.1814046949148178, "rewards/margins": 0.2313704937696457, "rewards/rejected": -0.4127751588821411, "step": 4674 }, { "epoch": 12.799452429842574, "grad_norm": 4.088352680206299, "learning_rate": 3.595890410958904e-07, "log_odds_chosen": 3.280214786529541, "log_odds_ratio": -0.1401938945055008, "logits/chosen": 1.0177260637283325, "logits/rejected": 1.0300216674804688, "logps/chosen": -1.5272412300109863, "logps/rejected": -4.560102462768555, "loss": 0.4945, "nll_loss": 0.4805033206939697, "rewards/accuracies": 1.0, "rewards/chosen": -0.15272413194179535, "rewards/margins": 0.3032861351966858, "rewards/rejected": -0.45601025223731995, "step": 4675 }, { "epoch": 12.802190280629706, "grad_norm": 3.5374250411987305, "learning_rate": 3.594520547945205e-07, "log_odds_chosen": 2.167041063308716, "log_odds_ratio": -0.27953672409057617, "logits/chosen": 0.8903053998947144, "logits/rejected": 0.8510960340499878, "logps/chosen": -2.1625163555145264, "logps/rejected": -4.2183074951171875, "loss": 0.4951, "nll_loss": 0.467155396938324, "rewards/accuracies": 1.0, "rewards/chosen": -0.21625164151191711, "rewards/margins": 0.20557910203933716, "rewards/rejected": -0.4218307137489319, "step": 4676 }, { "epoch": 12.804928131416839, "grad_norm": 3.6593377590179443, "learning_rate": 3.593150684931507e-07, "log_odds_chosen": 1.9439866542816162, "log_odds_ratio": -0.21612229943275452, "logits/chosen": 0.9595785140991211, "logits/rejected": 0.9060176014900208, "logps/chosen": -1.4986200332641602, "logps/rejected": -3.228199005126953, "loss": 0.5083, "nll_loss": 0.4866418242454529, "rewards/accuracies": 1.0, "rewards/chosen": -0.14986200630664825, "rewards/margins": 0.17295792698860168, "rewards/rejected": -0.32281994819641113, "step": 4677 }, { "epoch": 12.80766598220397, "grad_norm": 3.572343349456787, "learning_rate": 3.591780821917808e-07, "log_odds_chosen": 3.8514509201049805, "log_odds_ratio": -0.21109725534915924, "logits/chosen": 0.9618577361106873, "logits/rejected": 0.95846027135849, "logps/chosen": -1.8840252161026, "logps/rejected": -5.529493808746338, "loss": 0.5639, "nll_loss": 0.542800784111023, "rewards/accuracies": 1.0, "rewards/chosen": -0.18840253353118896, "rewards/margins": 0.36454689502716064, "rewards/rejected": -0.5529493689537048, "step": 4678 }, { "epoch": 12.810403832991103, "grad_norm": 3.700486421585083, "learning_rate": 3.5904109589041095e-07, "log_odds_chosen": 2.364753246307373, "log_odds_ratio": -0.26618140935897827, "logits/chosen": 1.1853554248809814, "logits/rejected": 1.1954822540283203, "logps/chosen": -1.6664994955062866, "logps/rejected": -3.6816048622131348, "loss": 0.4898, "nll_loss": 0.4631909132003784, "rewards/accuracies": 0.875, "rewards/chosen": -0.1666499525308609, "rewards/margins": 0.2015105038881302, "rewards/rejected": -0.3681604862213135, "step": 4679 }, { "epoch": 12.813141683778234, "grad_norm": 10.540091514587402, "learning_rate": 3.589041095890411e-07, "log_odds_chosen": 2.706575393676758, "log_odds_ratio": -0.5682463049888611, "logits/chosen": 0.9803000688552856, "logits/rejected": 0.9799544811248779, "logps/chosen": -2.6080143451690674, "logps/rejected": -5.214663505554199, "loss": 0.6321, "nll_loss": 0.5753183364868164, "rewards/accuracies": 0.875, "rewards/chosen": -0.26080143451690674, "rewards/margins": 0.2606649398803711, "rewards/rejected": -0.5214663743972778, "step": 4680 }, { "epoch": 12.815879534565367, "grad_norm": 3.224839687347412, "learning_rate": 3.587671232876712e-07, "log_odds_chosen": 2.2255935668945312, "log_odds_ratio": -0.17479307949543, "logits/chosen": 1.0903784036636353, "logits/rejected": 1.0788615942001343, "logps/chosen": -1.7229280471801758, "logps/rejected": -3.743964672088623, "loss": 0.4973, "nll_loss": 0.4798160493373871, "rewards/accuracies": 1.0, "rewards/chosen": -0.1722927987575531, "rewards/margins": 0.20210368931293488, "rewards/rejected": -0.37439650297164917, "step": 4681 }, { "epoch": 12.818617385352498, "grad_norm": 4.833329677581787, "learning_rate": 3.5863013698630136e-07, "log_odds_chosen": 4.837583541870117, "log_odds_ratio": -0.12214165925979614, "logits/chosen": 1.18876051902771, "logits/rejected": 1.1107370853424072, "logps/chosen": -2.037214756011963, "logps/rejected": -6.677191257476807, "loss": 0.6264, "nll_loss": 0.6141607761383057, "rewards/accuracies": 1.0, "rewards/chosen": -0.20372147858142853, "rewards/margins": 0.46399763226509094, "rewards/rejected": -0.6677191257476807, "step": 4682 }, { "epoch": 12.821355236139631, "grad_norm": 4.766346454620361, "learning_rate": 3.5849315068493146e-07, "log_odds_chosen": 1.0984212160110474, "log_odds_ratio": -0.4581996202468872, "logits/chosen": 1.079195499420166, "logits/rejected": 1.0588088035583496, "logps/chosen": -2.3750476837158203, "logps/rejected": -3.380066394805908, "loss": 0.6594, "nll_loss": 0.6135716438293457, "rewards/accuracies": 0.75, "rewards/chosen": -0.237504780292511, "rewards/margins": 0.1005018875002861, "rewards/rejected": -0.3380066752433777, "step": 4683 }, { "epoch": 12.824093086926762, "grad_norm": 3.351438522338867, "learning_rate": 3.5835616438356166e-07, "log_odds_chosen": 3.142439365386963, "log_odds_ratio": -0.2044178545475006, "logits/chosen": 0.9124481081962585, "logits/rejected": 0.8678059577941895, "logps/chosen": -1.6965925693511963, "logps/rejected": -4.607080936431885, "loss": 0.5827, "nll_loss": 0.5623006820678711, "rewards/accuracies": 1.0, "rewards/chosen": -0.16965925693511963, "rewards/margins": 0.2910488247871399, "rewards/rejected": -0.4607080817222595, "step": 4684 }, { "epoch": 12.826830937713895, "grad_norm": 3.4456045627593994, "learning_rate": 3.5821917808219176e-07, "log_odds_chosen": 2.613703727722168, "log_odds_ratio": -0.16088062524795532, "logits/chosen": 1.004105567932129, "logits/rejected": 0.7943190336227417, "logps/chosen": -1.4212206602096558, "logps/rejected": -3.789851427078247, "loss": 0.6065, "nll_loss": 0.5903855562210083, "rewards/accuracies": 1.0, "rewards/chosen": -0.1421220749616623, "rewards/margins": 0.23686310648918152, "rewards/rejected": -0.3789851665496826, "step": 4685 }, { "epoch": 12.829568788501026, "grad_norm": 3.6126129627227783, "learning_rate": 3.5808219178082186e-07, "log_odds_chosen": 2.4857306480407715, "log_odds_ratio": -0.16031861305236816, "logits/chosen": 0.877865731716156, "logits/rejected": 0.8783331513404846, "logps/chosen": -2.0841317176818848, "logps/rejected": -4.425113677978516, "loss": 0.5406, "nll_loss": 0.5245698690414429, "rewards/accuracies": 1.0, "rewards/chosen": -0.20841318368911743, "rewards/margins": 0.2340981811285019, "rewards/rejected": -0.44251134991645813, "step": 4686 }, { "epoch": 12.83230663928816, "grad_norm": 5.006785869598389, "learning_rate": 3.5794520547945206e-07, "log_odds_chosen": 1.6383956670761108, "log_odds_ratio": -0.5135102272033691, "logits/chosen": 0.9352384209632874, "logits/rejected": 0.8781840801239014, "logps/chosen": -1.8784321546554565, "logps/rejected": -3.30511474609375, "loss": 0.5009, "nll_loss": 0.4495714008808136, "rewards/accuracies": 0.75, "rewards/chosen": -0.1878432035446167, "rewards/margins": 0.14266827702522278, "rewards/rejected": -0.33051151037216187, "step": 4687 }, { "epoch": 12.83504449007529, "grad_norm": 7.4720892906188965, "learning_rate": 3.5780821917808216e-07, "log_odds_chosen": 1.9834344387054443, "log_odds_ratio": -0.3042331337928772, "logits/chosen": 1.4039859771728516, "logits/rejected": 1.3554925918579102, "logps/chosen": -2.4235150814056396, "logps/rejected": -4.29868745803833, "loss": 0.5491, "nll_loss": 0.5186992883682251, "rewards/accuracies": 0.875, "rewards/chosen": -0.24235153198242188, "rewards/margins": 0.1875171959400177, "rewards/rejected": -0.4298686981201172, "step": 4688 }, { "epoch": 12.837782340862423, "grad_norm": 4.238509178161621, "learning_rate": 3.576712328767123e-07, "log_odds_chosen": 6.804060459136963, "log_odds_ratio": -0.11742784082889557, "logits/chosen": 1.2153892517089844, "logits/rejected": 1.2677910327911377, "logps/chosen": -2.3341493606567383, "logps/rejected": -8.951286315917969, "loss": 0.6627, "nll_loss": 0.650967538356781, "rewards/accuracies": 1.0, "rewards/chosen": -0.23341494798660278, "rewards/margins": 0.661713719367981, "rewards/rejected": -0.8951287269592285, "step": 4689 }, { "epoch": 12.840520191649555, "grad_norm": 5.550442218780518, "learning_rate": 3.575342465753424e-07, "log_odds_chosen": 3.140650987625122, "log_odds_ratio": -0.21214169263839722, "logits/chosen": 1.243094563484192, "logits/rejected": 1.1749094724655151, "logps/chosen": -1.8987085819244385, "logps/rejected": -4.811830520629883, "loss": 0.6321, "nll_loss": 0.6108872294425964, "rewards/accuracies": 0.875, "rewards/chosen": -0.18987086415290833, "rewards/margins": 0.29131224751472473, "rewards/rejected": -0.48118311166763306, "step": 4690 }, { "epoch": 12.843258042436688, "grad_norm": 3.1908998489379883, "learning_rate": 3.573972602739726e-07, "log_odds_chosen": 2.8809869289398193, "log_odds_ratio": -0.2135162502527237, "logits/chosen": 0.9424746036529541, "logits/rejected": 0.9820972681045532, "logps/chosen": -1.8884897232055664, "logps/rejected": -4.600983142852783, "loss": 0.6084, "nll_loss": 0.5870862007141113, "rewards/accuracies": 1.0, "rewards/chosen": -0.18884897232055664, "rewards/margins": 0.27124935388565063, "rewards/rejected": -0.4600983262062073, "step": 4691 }, { "epoch": 12.845995893223819, "grad_norm": 4.892766952514648, "learning_rate": 3.572602739726027e-07, "log_odds_chosen": 4.914877891540527, "log_odds_ratio": -0.24501413106918335, "logits/chosen": 1.250448226928711, "logits/rejected": 1.3154940605163574, "logps/chosen": -2.2284226417541504, "logps/rejected": -6.942559242248535, "loss": 0.783, "nll_loss": 0.7585120797157288, "rewards/accuracies": 0.875, "rewards/chosen": -0.2228422462940216, "rewards/margins": 0.47141367197036743, "rewards/rejected": -0.6942559480667114, "step": 4692 }, { "epoch": 12.848733744010952, "grad_norm": 4.483701229095459, "learning_rate": 3.571232876712328e-07, "log_odds_chosen": 1.8216087818145752, "log_odds_ratio": -0.3490138649940491, "logits/chosen": 1.251530647277832, "logits/rejected": 1.2001914978027344, "logps/chosen": -1.8283417224884033, "logps/rejected": -3.4263718128204346, "loss": 0.584, "nll_loss": 0.5490553975105286, "rewards/accuracies": 0.875, "rewards/chosen": -0.18283416330814362, "rewards/margins": 0.15980301797389984, "rewards/rejected": -0.34263721108436584, "step": 4693 }, { "epoch": 12.851471594798083, "grad_norm": 8.42069149017334, "learning_rate": 3.56986301369863e-07, "log_odds_chosen": 1.6546168327331543, "log_odds_ratio": -0.38684093952178955, "logits/chosen": 1.0787701606750488, "logits/rejected": 1.0043513774871826, "logps/chosen": -2.3076932430267334, "logps/rejected": -3.814026117324829, "loss": 0.6404, "nll_loss": 0.6017276048660278, "rewards/accuracies": 0.875, "rewards/chosen": -0.2307693362236023, "rewards/margins": 0.150633305311203, "rewards/rejected": -0.3814026117324829, "step": 4694 }, { "epoch": 12.854209445585216, "grad_norm": 3.6830506324768066, "learning_rate": 3.568493150684931e-07, "log_odds_chosen": 3.8359415531158447, "log_odds_ratio": -0.1840055137872696, "logits/chosen": 1.0833113193511963, "logits/rejected": 1.1269792318344116, "logps/chosen": -1.9078936576843262, "logps/rejected": -5.528779029846191, "loss": 0.5743, "nll_loss": 0.5559259057044983, "rewards/accuracies": 0.875, "rewards/chosen": -0.1907893717288971, "rewards/margins": 0.36208853125572205, "rewards/rejected": -0.5528779029846191, "step": 4695 }, { "epoch": 12.856947296372347, "grad_norm": 3.4094014167785645, "learning_rate": 3.567123287671233e-07, "log_odds_chosen": 3.6848809719085693, "log_odds_ratio": -0.15859422087669373, "logits/chosen": 0.954304575920105, "logits/rejected": 0.9568483233451843, "logps/chosen": -1.8072434663772583, "logps/rejected": -5.328524589538574, "loss": 0.5854, "nll_loss": 0.5695334672927856, "rewards/accuracies": 1.0, "rewards/chosen": -0.1807243525981903, "rewards/margins": 0.35212811827659607, "rewards/rejected": -0.5328524708747864, "step": 4696 }, { "epoch": 12.85968514715948, "grad_norm": 4.005518436431885, "learning_rate": 3.5657534246575337e-07, "log_odds_chosen": 3.5250091552734375, "log_odds_ratio": -0.23652337491512299, "logits/chosen": 0.982682466506958, "logits/rejected": 0.9522494077682495, "logps/chosen": -1.3070054054260254, "logps/rejected": -4.566752910614014, "loss": 0.5139, "nll_loss": 0.4902957081794739, "rewards/accuracies": 1.0, "rewards/chosen": -0.13070052862167358, "rewards/margins": 0.325974702835083, "rewards/rejected": -0.456675261259079, "step": 4697 }, { "epoch": 12.862422997946611, "grad_norm": 6.86358642578125, "learning_rate": 3.564383561643836e-07, "log_odds_chosen": 1.0934290885925293, "log_odds_ratio": -0.5998486280441284, "logits/chosen": 0.8783560395240784, "logits/rejected": 0.8504554033279419, "logps/chosen": -2.140047550201416, "logps/rejected": -3.086097002029419, "loss": 0.5941, "nll_loss": 0.5340930223464966, "rewards/accuracies": 0.75, "rewards/chosen": -0.2140047699213028, "rewards/margins": 0.09460495412349701, "rewards/rejected": -0.3086097240447998, "step": 4698 }, { "epoch": 12.865160848733744, "grad_norm": 3.639620304107666, "learning_rate": 3.563013698630137e-07, "log_odds_chosen": 2.9072139263153076, "log_odds_ratio": -0.31741005182266235, "logits/chosen": 0.9079210758209229, "logits/rejected": 0.876876950263977, "logps/chosen": -2.047560453414917, "logps/rejected": -4.886053562164307, "loss": 0.5251, "nll_loss": 0.49332571029663086, "rewards/accuracies": 0.75, "rewards/chosen": -0.20475605130195618, "rewards/margins": 0.2838493287563324, "rewards/rejected": -0.4886053800582886, "step": 4699 }, { "epoch": 12.867898699520875, "grad_norm": 5.5725908279418945, "learning_rate": 3.561643835616438e-07, "log_odds_chosen": 3.3422298431396484, "log_odds_ratio": -0.13365556299686432, "logits/chosen": 0.8971673250198364, "logits/rejected": 0.8812924027442932, "logps/chosen": -2.2885775566101074, "logps/rejected": -5.509491920471191, "loss": 0.6456, "nll_loss": 0.6322157382965088, "rewards/accuracies": 1.0, "rewards/chosen": -0.22885775566101074, "rewards/margins": 0.3220914602279663, "rewards/rejected": -0.550949215888977, "step": 4700 }, { "epoch": 12.870636550308008, "grad_norm": 5.110595226287842, "learning_rate": 3.56027397260274e-07, "log_odds_chosen": 2.6549389362335205, "log_odds_ratio": -0.2759777903556824, "logits/chosen": 1.1563620567321777, "logits/rejected": 1.1357386112213135, "logps/chosen": -2.201890707015991, "logps/rejected": -4.670051574707031, "loss": 0.5563, "nll_loss": 0.5287376046180725, "rewards/accuracies": 0.875, "rewards/chosen": -0.22018907964229584, "rewards/margins": 0.24681605398654938, "rewards/rejected": -0.4670051336288452, "step": 4701 }, { "epoch": 12.873374401095141, "grad_norm": 3.3689768314361572, "learning_rate": 3.558904109589041e-07, "log_odds_chosen": 1.9764858484268188, "log_odds_ratio": -0.2073754370212555, "logits/chosen": 0.8595885038375854, "logits/rejected": 0.8023322820663452, "logps/chosen": -1.6716399192810059, "logps/rejected": -3.4262430667877197, "loss": 0.6191, "nll_loss": 0.5983539819717407, "rewards/accuracies": 1.0, "rewards/chosen": -0.16716399788856506, "rewards/margins": 0.1754603087902069, "rewards/rejected": -0.342624306678772, "step": 4702 }, { "epoch": 12.876112251882272, "grad_norm": 3.7619967460632324, "learning_rate": 3.5575342465753423e-07, "log_odds_chosen": 2.6854732036590576, "log_odds_ratio": -0.23868058621883392, "logits/chosen": 0.9110944867134094, "logits/rejected": 0.8977757692337036, "logps/chosen": -2.213230848312378, "logps/rejected": -4.750107765197754, "loss": 0.6123, "nll_loss": 0.588403046131134, "rewards/accuracies": 1.0, "rewards/chosen": -0.22132308781147003, "rewards/margins": 0.25368767976760864, "rewards/rejected": -0.47501078248023987, "step": 4703 }, { "epoch": 12.878850102669405, "grad_norm": 3.7661664485931396, "learning_rate": 3.556164383561644e-07, "log_odds_chosen": 1.071720838546753, "log_odds_ratio": -0.45857200026512146, "logits/chosen": 1.1420401334762573, "logits/rejected": 1.0745880603790283, "logps/chosen": -1.5261766910552979, "logps/rejected": -2.3835253715515137, "loss": 0.5618, "nll_loss": 0.515971839427948, "rewards/accuracies": 0.75, "rewards/chosen": -0.1526176631450653, "rewards/margins": 0.08573487401008606, "rewards/rejected": -0.23835253715515137, "step": 4704 }, { "epoch": 12.881587953456537, "grad_norm": 3.72587513923645, "learning_rate": 3.5547945205479454e-07, "log_odds_chosen": 2.451915740966797, "log_odds_ratio": -0.2347838133573532, "logits/chosen": 1.2604256868362427, "logits/rejected": 1.3133459091186523, "logps/chosen": -2.191903829574585, "logps/rejected": -4.54118537902832, "loss": 0.5202, "nll_loss": 0.4966932535171509, "rewards/accuracies": 1.0, "rewards/chosen": -0.21919038891792297, "rewards/margins": 0.23492813110351562, "rewards/rejected": -0.4541185200214386, "step": 4705 }, { "epoch": 12.88432580424367, "grad_norm": 3.371340751647949, "learning_rate": 3.5534246575342464e-07, "log_odds_chosen": 3.4973878860473633, "log_odds_ratio": -0.20077082514762878, "logits/chosen": 0.8089866042137146, "logits/rejected": 0.7807841897010803, "logps/chosen": -1.6282551288604736, "logps/rejected": -4.910622596740723, "loss": 0.61, "nll_loss": 0.5899214148521423, "rewards/accuracies": 1.0, "rewards/chosen": -0.16282550990581512, "rewards/margins": 0.32823672890663147, "rewards/rejected": -0.4910622537136078, "step": 4706 }, { "epoch": 12.8870636550308, "grad_norm": 5.595111846923828, "learning_rate": 3.5520547945205473e-07, "log_odds_chosen": 2.655411720275879, "log_odds_ratio": -0.24906788766384125, "logits/chosen": 1.0817493200302124, "logits/rejected": 1.0943776369094849, "logps/chosen": -3.2251534461975098, "logps/rejected": -5.767579078674316, "loss": 0.7357, "nll_loss": 0.7107570171356201, "rewards/accuracies": 0.875, "rewards/chosen": -0.3225153088569641, "rewards/margins": 0.25424259901046753, "rewards/rejected": -0.5767579078674316, "step": 4707 }, { "epoch": 12.889801505817934, "grad_norm": 3.824615240097046, "learning_rate": 3.5506849315068494e-07, "log_odds_chosen": 0.3650277256965637, "log_odds_ratio": -0.5480152368545532, "logits/chosen": 1.2981599569320679, "logits/rejected": 1.2455356121063232, "logps/chosen": -1.5765485763549805, "logps/rejected": -1.865756869316101, "loss": 0.5261, "nll_loss": 0.47130057215690613, "rewards/accuracies": 0.75, "rewards/chosen": -0.15765486657619476, "rewards/margins": 0.028920818120241165, "rewards/rejected": -0.18657568097114563, "step": 4708 }, { "epoch": 12.892539356605065, "grad_norm": 4.032596111297607, "learning_rate": 3.5493150684931504e-07, "log_odds_chosen": 3.6653945446014404, "log_odds_ratio": -0.39996737241744995, "logits/chosen": 0.9093742966651917, "logits/rejected": 0.8194547891616821, "logps/chosen": -2.572101593017578, "logps/rejected": -6.110998153686523, "loss": 0.6647, "nll_loss": 0.6247463822364807, "rewards/accuracies": 0.875, "rewards/chosen": -0.2572101354598999, "rewards/margins": 0.3538896441459656, "rewards/rejected": -0.6110998392105103, "step": 4709 }, { "epoch": 12.895277207392198, "grad_norm": 3.3439996242523193, "learning_rate": 3.547945205479452e-07, "log_odds_chosen": 2.3698818683624268, "log_odds_ratio": -0.2193332016468048, "logits/chosen": 0.6494489312171936, "logits/rejected": 0.5726642608642578, "logps/chosen": -1.5482280254364014, "logps/rejected": -3.7276270389556885, "loss": 0.5296, "nll_loss": 0.5076560974121094, "rewards/accuracies": 1.0, "rewards/chosen": -0.15482279658317566, "rewards/margins": 0.21793991327285767, "rewards/rejected": -0.3727627098560333, "step": 4710 }, { "epoch": 12.898015058179329, "grad_norm": 4.331001281738281, "learning_rate": 3.5465753424657534e-07, "log_odds_chosen": 3.138972282409668, "log_odds_ratio": -0.2412012219429016, "logits/chosen": 0.9792171716690063, "logits/rejected": 1.0059592723846436, "logps/chosen": -1.7656364440917969, "logps/rejected": -4.746399879455566, "loss": 0.5178, "nll_loss": 0.4936560094356537, "rewards/accuracies": 0.875, "rewards/chosen": -0.17656363546848297, "rewards/margins": 0.2980763614177704, "rewards/rejected": -0.47463998198509216, "step": 4711 }, { "epoch": 12.900752908966462, "grad_norm": 5.356046199798584, "learning_rate": 3.545205479452055e-07, "log_odds_chosen": 1.8203680515289307, "log_odds_ratio": -0.5318556427955627, "logits/chosen": 0.9189436435699463, "logits/rejected": 0.9085798263549805, "logps/chosen": -2.3926196098327637, "logps/rejected": -4.055367946624756, "loss": 0.6681, "nll_loss": 0.6149134635925293, "rewards/accuracies": 0.75, "rewards/chosen": -0.2392619550228119, "rewards/margins": 0.16627484560012817, "rewards/rejected": -0.40553680062294006, "step": 4712 }, { "epoch": 12.903490759753593, "grad_norm": 3.780331611633301, "learning_rate": 3.543835616438356e-07, "log_odds_chosen": 3.351195812225342, "log_odds_ratio": -0.17288731038570404, "logits/chosen": 1.0666851997375488, "logits/rejected": 1.0572971105575562, "logps/chosen": -1.9103155136108398, "logps/rejected": -5.053043842315674, "loss": 0.5918, "nll_loss": 0.5745266675949097, "rewards/accuracies": 1.0, "rewards/chosen": -0.1910315454006195, "rewards/margins": 0.31427282094955444, "rewards/rejected": -0.5053043961524963, "step": 4713 }, { "epoch": 12.906228610540726, "grad_norm": 4.273083209991455, "learning_rate": 3.542465753424657e-07, "log_odds_chosen": 2.4766337871551514, "log_odds_ratio": -0.2405424267053604, "logits/chosen": 0.9013018608093262, "logits/rejected": 0.846787691116333, "logps/chosen": -1.9083542823791504, "logps/rejected": -4.162769794464111, "loss": 0.5284, "nll_loss": 0.5043582320213318, "rewards/accuracies": 0.875, "rewards/chosen": -0.19083544611930847, "rewards/margins": 0.22544154524803162, "rewards/rejected": -0.4162769913673401, "step": 4714 }, { "epoch": 12.908966461327857, "grad_norm": 3.7603893280029297, "learning_rate": 3.541095890410959e-07, "log_odds_chosen": 2.9076907634735107, "log_odds_ratio": -0.23755817115306854, "logits/chosen": 1.0193904638290405, "logits/rejected": 1.0021123886108398, "logps/chosen": -1.9373738765716553, "logps/rejected": -4.741582870483398, "loss": 0.657, "nll_loss": 0.6332870721817017, "rewards/accuracies": 1.0, "rewards/chosen": -0.19373738765716553, "rewards/margins": 0.2804208993911743, "rewards/rejected": -0.47415828704833984, "step": 4715 }, { "epoch": 12.91170431211499, "grad_norm": 5.493532180786133, "learning_rate": 3.53972602739726e-07, "log_odds_chosen": 4.088974952697754, "log_odds_ratio": -0.2962762713432312, "logits/chosen": 0.9602793455123901, "logits/rejected": 0.9034870266914368, "logps/chosen": -1.932294249534607, "logps/rejected": -5.76694393157959, "loss": 0.6949, "nll_loss": 0.6653035283088684, "rewards/accuracies": 0.875, "rewards/chosen": -0.19322943687438965, "rewards/margins": 0.3834649324417114, "rewards/rejected": -0.5766943693161011, "step": 4716 }, { "epoch": 12.914442162902121, "grad_norm": 3.4798648357391357, "learning_rate": 3.5383561643835615e-07, "log_odds_chosen": 2.148719310760498, "log_odds_ratio": -0.2574400007724762, "logits/chosen": 0.8665121793746948, "logits/rejected": 0.7630525827407837, "logps/chosen": -1.6965901851654053, "logps/rejected": -3.672473192214966, "loss": 0.5372, "nll_loss": 0.5114277601242065, "rewards/accuracies": 1.0, "rewards/chosen": -0.16965901851654053, "rewards/margins": 0.19758829474449158, "rewards/rejected": -0.3672473132610321, "step": 4717 }, { "epoch": 12.917180013689254, "grad_norm": 6.926382541656494, "learning_rate": 3.536986301369863e-07, "log_odds_chosen": 1.3471683263778687, "log_odds_ratio": -0.45458680391311646, "logits/chosen": 1.209571123123169, "logits/rejected": 1.1614969968795776, "logps/chosen": -2.13795804977417, "logps/rejected": -3.371321678161621, "loss": 0.6181, "nll_loss": 0.5726792812347412, "rewards/accuracies": 0.75, "rewards/chosen": -0.21379581093788147, "rewards/margins": 0.12333633005619049, "rewards/rejected": -0.33713215589523315, "step": 4718 }, { "epoch": 12.919917864476385, "grad_norm": 3.92800235748291, "learning_rate": 3.5356164383561645e-07, "log_odds_chosen": 3.0654516220092773, "log_odds_ratio": -0.28882521390914917, "logits/chosen": 0.818554162979126, "logits/rejected": 0.8013070225715637, "logps/chosen": -2.407994270324707, "logps/rejected": -5.39326810836792, "loss": 0.5747, "nll_loss": 0.5458386540412903, "rewards/accuracies": 0.875, "rewards/chosen": -0.24079939723014832, "rewards/margins": 0.29852738976478577, "rewards/rejected": -0.5393267869949341, "step": 4719 }, { "epoch": 12.922655715263518, "grad_norm": 3.156193494796753, "learning_rate": 3.5342465753424655e-07, "log_odds_chosen": 3.408635139465332, "log_odds_ratio": -0.20795640349388123, "logits/chosen": 0.9055399298667908, "logits/rejected": 0.9177075028419495, "logps/chosen": -2.337728500366211, "logps/rejected": -5.645232200622559, "loss": 0.5898, "nll_loss": 0.5689684748649597, "rewards/accuracies": 0.875, "rewards/chosen": -0.2337728589773178, "rewards/margins": 0.33075037598609924, "rewards/rejected": -0.5645232200622559, "step": 4720 }, { "epoch": 12.92539356605065, "grad_norm": 8.59830093383789, "learning_rate": 3.5328767123287665e-07, "log_odds_chosen": 3.562138080596924, "log_odds_ratio": -0.11393444240093231, "logits/chosen": 1.2035760879516602, "logits/rejected": 1.2202553749084473, "logps/chosen": -1.8594481945037842, "logps/rejected": -5.202481746673584, "loss": 0.5792, "nll_loss": 0.5678389668464661, "rewards/accuracies": 1.0, "rewards/chosen": -0.1859448254108429, "rewards/margins": 0.3343033492565155, "rewards/rejected": -0.5202481746673584, "step": 4721 }, { "epoch": 12.928131416837783, "grad_norm": 3.686753749847412, "learning_rate": 3.5315068493150686e-07, "log_odds_chosen": 2.114935874938965, "log_odds_ratio": -0.197687566280365, "logits/chosen": 0.8580954074859619, "logits/rejected": 0.8725229501724243, "logps/chosen": -1.7627888917922974, "logps/rejected": -3.682920455932617, "loss": 0.4974, "nll_loss": 0.4776536822319031, "rewards/accuracies": 1.0, "rewards/chosen": -0.17627890408039093, "rewards/margins": 0.19201315939426422, "rewards/rejected": -0.36829206347465515, "step": 4722 }, { "epoch": 12.930869267624914, "grad_norm": 4.318070888519287, "learning_rate": 3.5301369863013696e-07, "log_odds_chosen": 2.7595152854919434, "log_odds_ratio": -0.1899004727602005, "logits/chosen": 1.115859031677246, "logits/rejected": 1.1649391651153564, "logps/chosen": -2.8957786560058594, "logps/rejected": -5.596608638763428, "loss": 0.6294, "nll_loss": 0.6104413866996765, "rewards/accuracies": 1.0, "rewards/chosen": -0.289577841758728, "rewards/margins": 0.2700830101966858, "rewards/rejected": -0.5596609115600586, "step": 4723 }, { "epoch": 12.933607118412047, "grad_norm": 6.314094066619873, "learning_rate": 3.528767123287671e-07, "log_odds_chosen": 1.9048027992248535, "log_odds_ratio": -0.35298895835876465, "logits/chosen": 0.9593639373779297, "logits/rejected": 1.0069234371185303, "logps/chosen": -2.8029837608337402, "logps/rejected": -4.622581958770752, "loss": 0.7737, "nll_loss": 0.7383675575256348, "rewards/accuracies": 0.75, "rewards/chosen": -0.2802983820438385, "rewards/margins": 0.1819598376750946, "rewards/rejected": -0.4622582197189331, "step": 4724 }, { "epoch": 12.936344969199178, "grad_norm": 3.756544828414917, "learning_rate": 3.5273972602739726e-07, "log_odds_chosen": 1.925673246383667, "log_odds_ratio": -0.27272728085517883, "logits/chosen": 1.1002119779586792, "logits/rejected": 1.0874378681182861, "logps/chosen": -1.4045392274856567, "logps/rejected": -3.1145901679992676, "loss": 0.4471, "nll_loss": 0.4198410212993622, "rewards/accuracies": 1.0, "rewards/chosen": -0.14045393466949463, "rewards/margins": 0.17100510001182556, "rewards/rejected": -0.3114590346813202, "step": 4725 }, { "epoch": 12.93908281998631, "grad_norm": 3.1971752643585205, "learning_rate": 3.526027397260274e-07, "log_odds_chosen": 3.7450289726257324, "log_odds_ratio": -0.09166799485683441, "logits/chosen": 0.9518078565597534, "logits/rejected": 0.9547038078308105, "logps/chosen": -2.4589362144470215, "logps/rejected": -6.075483322143555, "loss": 0.5428, "nll_loss": 0.533673882484436, "rewards/accuracies": 1.0, "rewards/chosen": -0.24589359760284424, "rewards/margins": 0.36165469884872437, "rewards/rejected": -0.6075483560562134, "step": 4726 }, { "epoch": 12.941820670773442, "grad_norm": 3.643839120864868, "learning_rate": 3.524657534246575e-07, "log_odds_chosen": 2.692807197570801, "log_odds_ratio": -0.23168262839317322, "logits/chosen": 1.1412779092788696, "logits/rejected": 1.1718974113464355, "logps/chosen": -1.9458777904510498, "logps/rejected": -4.531749248504639, "loss": 0.5514, "nll_loss": 0.5282515287399292, "rewards/accuracies": 0.875, "rewards/chosen": -0.19458778202533722, "rewards/margins": 0.25858715176582336, "rewards/rejected": -0.4531749486923218, "step": 4727 }, { "epoch": 12.944558521560575, "grad_norm": 6.449957370758057, "learning_rate": 3.523287671232876e-07, "log_odds_chosen": 3.0266566276550293, "log_odds_ratio": -0.29761770367622375, "logits/chosen": 0.9820335507392883, "logits/rejected": 0.9477713108062744, "logps/chosen": -2.4938316345214844, "logps/rejected": -5.297420024871826, "loss": 0.6709, "nll_loss": 0.6410905122756958, "rewards/accuracies": 0.875, "rewards/chosen": -0.24938316643238068, "rewards/margins": 0.28035882115364075, "rewards/rejected": -0.5297420024871826, "step": 4728 }, { "epoch": 12.947296372347708, "grad_norm": 2.91845703125, "learning_rate": 3.521917808219178e-07, "log_odds_chosen": 2.6973819732666016, "log_odds_ratio": -0.1245383769273758, "logits/chosen": 1.233410120010376, "logits/rejected": 1.189308524131775, "logps/chosen": -1.613034725189209, "logps/rejected": -4.084924221038818, "loss": 0.4853, "nll_loss": 0.47285139560699463, "rewards/accuracies": 1.0, "rewards/chosen": -0.16130346059799194, "rewards/margins": 0.24718895554542542, "rewards/rejected": -0.40849244594573975, "step": 4729 }, { "epoch": 12.950034223134839, "grad_norm": 3.89727783203125, "learning_rate": 3.520547945205479e-07, "log_odds_chosen": 4.200416088104248, "log_odds_ratio": -0.14143680036067963, "logits/chosen": 0.935198187828064, "logits/rejected": 0.919095516204834, "logps/chosen": -2.600260019302368, "logps/rejected": -6.634363174438477, "loss": 0.6064, "nll_loss": 0.5922847986221313, "rewards/accuracies": 1.0, "rewards/chosen": -0.2600260078907013, "rewards/margins": 0.40341031551361084, "rewards/rejected": -0.6634363532066345, "step": 4730 }, { "epoch": 12.952772073921972, "grad_norm": 6.977695941925049, "learning_rate": 3.5191780821917807e-07, "log_odds_chosen": 1.6083317995071411, "log_odds_ratio": -0.2907980680465698, "logits/chosen": 1.0043507814407349, "logits/rejected": 0.9380330443382263, "logps/chosen": -2.6312973499298096, "logps/rejected": -4.096837997436523, "loss": 0.6302, "nll_loss": 0.6010804176330566, "rewards/accuracies": 1.0, "rewards/chosen": -0.26312974095344543, "rewards/margins": 0.146554097533226, "rewards/rejected": -0.40968382358551025, "step": 4731 }, { "epoch": 12.955509924709103, "grad_norm": 4.171370983123779, "learning_rate": 3.517808219178082e-07, "log_odds_chosen": 3.540728807449341, "log_odds_ratio": -0.22816231846809387, "logits/chosen": 0.8733595609664917, "logits/rejected": 0.8823220729827881, "logps/chosen": -2.124854326248169, "logps/rejected": -5.512920379638672, "loss": 0.6506, "nll_loss": 0.627782940864563, "rewards/accuracies": 1.0, "rewards/chosen": -0.2124854326248169, "rewards/margins": 0.3388066291809082, "rewards/rejected": -0.5512920618057251, "step": 4732 }, { "epoch": 12.958247775496236, "grad_norm": 5.260117053985596, "learning_rate": 3.5164383561643837e-07, "log_odds_chosen": 3.0998001098632812, "log_odds_ratio": -0.12889176607131958, "logits/chosen": 1.0602399110794067, "logits/rejected": 1.1251401901245117, "logps/chosen": -2.573209762573242, "logps/rejected": -5.507064342498779, "loss": 0.6595, "nll_loss": 0.6466124653816223, "rewards/accuracies": 1.0, "rewards/chosen": -0.25732100009918213, "rewards/margins": 0.29338544607162476, "rewards/rejected": -0.5507064461708069, "step": 4733 }, { "epoch": 12.960985626283367, "grad_norm": 3.7840652465820312, "learning_rate": 3.5150684931506847e-07, "log_odds_chosen": 2.3103091716766357, "log_odds_ratio": -0.2860702872276306, "logits/chosen": 0.995725154876709, "logits/rejected": 1.0077311992645264, "logps/chosen": -1.7053134441375732, "logps/rejected": -3.8563036918640137, "loss": 0.5387, "nll_loss": 0.5101014375686646, "rewards/accuracies": 0.875, "rewards/chosen": -0.17053136229515076, "rewards/margins": 0.2150990068912506, "rewards/rejected": -0.38563039898872375, "step": 4734 }, { "epoch": 12.9637234770705, "grad_norm": 3.804986000061035, "learning_rate": 3.513698630136986e-07, "log_odds_chosen": 1.7225528955459595, "log_odds_ratio": -0.33916181325912476, "logits/chosen": 0.9476743936538696, "logits/rejected": 0.8341785669326782, "logps/chosen": -1.4123620986938477, "logps/rejected": -2.9737915992736816, "loss": 0.4872, "nll_loss": 0.4533216655254364, "rewards/accuracies": 0.875, "rewards/chosen": -0.14123621582984924, "rewards/margins": 0.1561429798603058, "rewards/rejected": -0.29737919569015503, "step": 4735 }, { "epoch": 12.966461327857632, "grad_norm": 4.351912021636963, "learning_rate": 3.512328767123288e-07, "log_odds_chosen": 1.8863669633865356, "log_odds_ratio": -0.26427415013313293, "logits/chosen": 0.8159360885620117, "logits/rejected": 0.7719509601593018, "logps/chosen": -1.7384626865386963, "logps/rejected": -3.4440407752990723, "loss": 0.6389, "nll_loss": 0.6124666333198547, "rewards/accuracies": 1.0, "rewards/chosen": -0.1738462746143341, "rewards/margins": 0.17055779695510864, "rewards/rejected": -0.34440410137176514, "step": 4736 }, { "epoch": 12.969199178644764, "grad_norm": 3.996521472930908, "learning_rate": 3.5109589041095887e-07, "log_odds_chosen": 4.051609039306641, "log_odds_ratio": -0.0451003760099411, "logits/chosen": 1.4440187215805054, "logits/rejected": 1.527895212173462, "logps/chosen": -1.9084175825119019, "logps/rejected": -5.7057952880859375, "loss": 0.5096, "nll_loss": 0.5050463676452637, "rewards/accuracies": 1.0, "rewards/chosen": -0.19084177911281586, "rewards/margins": 0.3797377943992615, "rewards/rejected": -0.5705795288085938, "step": 4737 }, { "epoch": 12.971937029431896, "grad_norm": 4.4129319190979, "learning_rate": 3.50958904109589e-07, "log_odds_chosen": 2.1880970001220703, "log_odds_ratio": -0.25071805715560913, "logits/chosen": 1.3229138851165771, "logits/rejected": 1.2591865062713623, "logps/chosen": -1.2520034313201904, "logps/rejected": -3.1981911659240723, "loss": 0.4195, "nll_loss": 0.3944416344165802, "rewards/accuracies": 1.0, "rewards/chosen": -0.12520034611225128, "rewards/margins": 0.19461876153945923, "rewards/rejected": -0.3198191225528717, "step": 4738 }, { "epoch": 12.974674880219029, "grad_norm": 3.6677820682525635, "learning_rate": 3.508219178082192e-07, "log_odds_chosen": 2.6144142150878906, "log_odds_ratio": -0.16289737820625305, "logits/chosen": 1.2007673978805542, "logits/rejected": 1.2380282878875732, "logps/chosen": -1.7423725128173828, "logps/rejected": -4.060366153717041, "loss": 0.4831, "nll_loss": 0.46685290336608887, "rewards/accuracies": 0.875, "rewards/chosen": -0.17423725128173828, "rewards/margins": 0.23179936408996582, "rewards/rejected": -0.4060366451740265, "step": 4739 }, { "epoch": 12.97741273100616, "grad_norm": 3.5474812984466553, "learning_rate": 3.506849315068493e-07, "log_odds_chosen": 0.9097676277160645, "log_odds_ratio": -0.42895519733428955, "logits/chosen": 1.2495396137237549, "logits/rejected": 1.2312167882919312, "logps/chosen": -1.1575182676315308, "logps/rejected": -1.8824256658554077, "loss": 0.4121, "nll_loss": 0.36925268173217773, "rewards/accuracies": 0.875, "rewards/chosen": -0.11575181782245636, "rewards/margins": 0.07249075174331665, "rewards/rejected": -0.188242569565773, "step": 4740 }, { "epoch": 12.980150581793293, "grad_norm": 3.9323513507843018, "learning_rate": 3.5054794520547943e-07, "log_odds_chosen": 1.573237657546997, "log_odds_ratio": -0.44324982166290283, "logits/chosen": 1.3309805393218994, "logits/rejected": 1.3468796014785767, "logps/chosen": -2.2151753902435303, "logps/rejected": -3.708284616470337, "loss": 0.5488, "nll_loss": 0.5045128464698792, "rewards/accuracies": 0.625, "rewards/chosen": -0.22151754796504974, "rewards/margins": 0.149310901761055, "rewards/rejected": -0.37082844972610474, "step": 4741 }, { "epoch": 12.982888432580424, "grad_norm": 3.63403582572937, "learning_rate": 3.504109589041096e-07, "log_odds_chosen": 3.834813117980957, "log_odds_ratio": -0.17387346923351288, "logits/chosen": 1.0643551349639893, "logits/rejected": 1.1116856336593628, "logps/chosen": -2.0694289207458496, "logps/rejected": -5.795650959014893, "loss": 0.7399, "nll_loss": 0.7225139141082764, "rewards/accuracies": 1.0, "rewards/chosen": -0.20694290101528168, "rewards/margins": 0.37262219190597534, "rewards/rejected": -0.5795651078224182, "step": 4742 }, { "epoch": 12.985626283367557, "grad_norm": 5.477021217346191, "learning_rate": 3.5027397260273973e-07, "log_odds_chosen": 3.310929298400879, "log_odds_ratio": -0.15968404710292816, "logits/chosen": 0.6721266508102417, "logits/rejected": 0.5904009342193604, "logps/chosen": -1.9431169033050537, "logps/rejected": -5.099587917327881, "loss": 0.6968, "nll_loss": 0.6808550357818604, "rewards/accuracies": 1.0, "rewards/chosen": -0.19431167840957642, "rewards/margins": 0.3156471252441406, "rewards/rejected": -0.509958803653717, "step": 4743 }, { "epoch": 12.988364134154688, "grad_norm": 3.327362298965454, "learning_rate": 3.5013698630136983e-07, "log_odds_chosen": 2.9048523902893066, "log_odds_ratio": -0.12201986461877823, "logits/chosen": 1.1090325117111206, "logits/rejected": 1.0585471391677856, "logps/chosen": -1.8193057775497437, "logps/rejected": -4.551512718200684, "loss": 0.5097, "nll_loss": 0.49751558899879456, "rewards/accuracies": 1.0, "rewards/chosen": -0.18193058669567108, "rewards/margins": 0.2732207179069519, "rewards/rejected": -0.4551513195037842, "step": 4744 }, { "epoch": 12.991101984941821, "grad_norm": 3.9670677185058594, "learning_rate": 3.5e-07, "log_odds_chosen": 1.47154700756073, "log_odds_ratio": -0.3120025098323822, "logits/chosen": 1.2200955152511597, "logits/rejected": 1.1391288042068481, "logps/chosen": -1.263708472251892, "logps/rejected": -2.518667221069336, "loss": 0.4414, "nll_loss": 0.41021695733070374, "rewards/accuracies": 1.0, "rewards/chosen": -0.1263708472251892, "rewards/margins": 0.12549588084220886, "rewards/rejected": -0.25186672806739807, "step": 4745 }, { "epoch": 12.993839835728952, "grad_norm": 4.074409484863281, "learning_rate": 3.4986301369863014e-07, "log_odds_chosen": 1.2516865730285645, "log_odds_ratio": -0.30558308959007263, "logits/chosen": 1.2309049367904663, "logits/rejected": 1.1978240013122559, "logps/chosen": -1.82441246509552, "logps/rejected": -2.89277982711792, "loss": 0.4848, "nll_loss": 0.45427972078323364, "rewards/accuracies": 1.0, "rewards/chosen": -0.18244124948978424, "rewards/margins": 0.10683673620223999, "rewards/rejected": -0.28927797079086304, "step": 4746 }, { "epoch": 12.996577686516085, "grad_norm": 3.3253753185272217, "learning_rate": 3.4972602739726024e-07, "log_odds_chosen": 3.854519844055176, "log_odds_ratio": -0.17761048674583435, "logits/chosen": 0.9584211707115173, "logits/rejected": 0.9019483923912048, "logps/chosen": -1.4443351030349731, "logps/rejected": -5.025186061859131, "loss": 0.5303, "nll_loss": 0.5125606656074524, "rewards/accuracies": 1.0, "rewards/chosen": -0.14443351328372955, "rewards/margins": 0.35808509588241577, "rewards/rejected": -0.5025185346603394, "step": 4747 }, { "epoch": 12.999315537303216, "grad_norm": 4.655368328094482, "learning_rate": 3.495890410958904e-07, "log_odds_chosen": 1.6545941829681396, "log_odds_ratio": -0.333394855260849, "logits/chosen": 1.1111564636230469, "logits/rejected": 1.1097906827926636, "logps/chosen": -2.3044567108154297, "logps/rejected": -3.7710983753204346, "loss": 0.5404, "nll_loss": 0.50705486536026, "rewards/accuracies": 0.875, "rewards/chosen": -0.23044565320014954, "rewards/margins": 0.14666420221328735, "rewards/rejected": -0.3771098256111145, "step": 4748 }, { "epoch": 13.00205338809035, "grad_norm": 3.732607126235962, "learning_rate": 3.4945205479452054e-07, "log_odds_chosen": 3.2342801094055176, "log_odds_ratio": -0.20353879034519196, "logits/chosen": 0.9199126362800598, "logits/rejected": 0.8629869222640991, "logps/chosen": -1.9844162464141846, "logps/rejected": -5.080039024353027, "loss": 0.6664, "nll_loss": 0.6460298299789429, "rewards/accuracies": 0.875, "rewards/chosen": -0.19844162464141846, "rewards/margins": 0.3095622658729553, "rewards/rejected": -0.5080038905143738, "step": 4749 }, { "epoch": 13.00479123887748, "grad_norm": 3.9670135974884033, "learning_rate": 3.493150684931507e-07, "log_odds_chosen": 1.9885154962539673, "log_odds_ratio": -0.19118361175060272, "logits/chosen": 0.9942947626113892, "logits/rejected": 0.9885481595993042, "logps/chosen": -2.1181416511535645, "logps/rejected": -3.9282102584838867, "loss": 0.5343, "nll_loss": 0.5151993036270142, "rewards/accuracies": 1.0, "rewards/chosen": -0.21181418001651764, "rewards/margins": 0.18100686371326447, "rewards/rejected": -0.3928210437297821, "step": 4750 }, { "epoch": 13.007529089664613, "grad_norm": 4.147588729858398, "learning_rate": 3.491780821917808e-07, "log_odds_chosen": 2.0066654682159424, "log_odds_ratio": -0.23562416434288025, "logits/chosen": 1.0014406442642212, "logits/rejected": 0.9879931211471558, "logps/chosen": -2.4186038970947266, "logps/rejected": -4.317048072814941, "loss": 0.5786, "nll_loss": 0.5550848245620728, "rewards/accuracies": 0.875, "rewards/chosen": -0.24186037480831146, "rewards/margins": 0.18984436988830566, "rewards/rejected": -0.4317047595977783, "step": 4751 }, { "epoch": 13.010266940451745, "grad_norm": 4.810150146484375, "learning_rate": 3.4904109589041094e-07, "log_odds_chosen": 1.4593886137008667, "log_odds_ratio": -0.2468247264623642, "logits/chosen": 0.9953947067260742, "logits/rejected": 0.9506924748420715, "logps/chosen": -1.919925570487976, "logps/rejected": -3.21213698387146, "loss": 0.6449, "nll_loss": 0.6201764345169067, "rewards/accuracies": 1.0, "rewards/chosen": -0.19199255108833313, "rewards/margins": 0.12922117114067078, "rewards/rejected": -0.3212137222290039, "step": 4752 }, { "epoch": 13.013004791238878, "grad_norm": 3.428790330886841, "learning_rate": 3.489041095890411e-07, "log_odds_chosen": 2.5015392303466797, "log_odds_ratio": -0.2824097275733948, "logits/chosen": 0.9759702682495117, "logits/rejected": 0.945749044418335, "logps/chosen": -1.5810706615447998, "logps/rejected": -3.868372678756714, "loss": 0.4511, "nll_loss": 0.4228995740413666, "rewards/accuracies": 0.875, "rewards/chosen": -0.15810707211494446, "rewards/margins": 0.2287302017211914, "rewards/rejected": -0.38683727383613586, "step": 4753 }, { "epoch": 13.015742642026009, "grad_norm": 3.4248287677764893, "learning_rate": 3.487671232876712e-07, "log_odds_chosen": 3.1904361248016357, "log_odds_ratio": -0.05516945570707321, "logits/chosen": 1.069386601448059, "logits/rejected": 1.1067683696746826, "logps/chosen": -2.012953519821167, "logps/rejected": -5.01685094833374, "loss": 0.5571, "nll_loss": 0.551542341709137, "rewards/accuracies": 1.0, "rewards/chosen": -0.2012953758239746, "rewards/margins": 0.30038970708847046, "rewards/rejected": -0.5016850829124451, "step": 4754 }, { "epoch": 13.018480492813142, "grad_norm": 4.082083702087402, "learning_rate": 3.4863013698630135e-07, "log_odds_chosen": 3.2764229774475098, "log_odds_ratio": -0.22919383645057678, "logits/chosen": 0.5988157987594604, "logits/rejected": 0.5334019064903259, "logps/chosen": -1.315492868423462, "logps/rejected": -4.354197978973389, "loss": 0.7081, "nll_loss": 0.6851719617843628, "rewards/accuracies": 1.0, "rewards/chosen": -0.13154928386211395, "rewards/margins": 0.3038705289363861, "rewards/rejected": -0.43541979789733887, "step": 4755 }, { "epoch": 13.021218343600275, "grad_norm": 3.535956859588623, "learning_rate": 3.484931506849315e-07, "log_odds_chosen": 2.8307862281799316, "log_odds_ratio": -0.08831761032342911, "logits/chosen": 1.2633273601531982, "logits/rejected": 1.3182456493377686, "logps/chosen": -1.9603891372680664, "logps/rejected": -4.5086565017700195, "loss": 0.4566, "nll_loss": 0.4478110074996948, "rewards/accuracies": 1.0, "rewards/chosen": -0.19603893160820007, "rewards/margins": 0.25482669472694397, "rewards/rejected": -0.45086565613746643, "step": 4756 }, { "epoch": 13.023956194387406, "grad_norm": 3.224583625793457, "learning_rate": 3.4835616438356165e-07, "log_odds_chosen": 3.365614414215088, "log_odds_ratio": -0.17824509739875793, "logits/chosen": 0.9779905080795288, "logits/rejected": 0.9922679662704468, "logps/chosen": -1.8419649600982666, "logps/rejected": -5.030429840087891, "loss": 0.582, "nll_loss": 0.5641587972640991, "rewards/accuracies": 1.0, "rewards/chosen": -0.18419650197029114, "rewards/margins": 0.31884652376174927, "rewards/rejected": -0.503042995929718, "step": 4757 }, { "epoch": 13.026694045174539, "grad_norm": 3.4329493045806885, "learning_rate": 3.4821917808219175e-07, "log_odds_chosen": 4.900217533111572, "log_odds_ratio": -0.1416093409061432, "logits/chosen": 0.966947615146637, "logits/rejected": 0.9783244729042053, "logps/chosen": -1.4415180683135986, "logps/rejected": -6.064336776733398, "loss": 0.6033, "nll_loss": 0.589115560054779, "rewards/accuracies": 0.875, "rewards/chosen": -0.14415180683135986, "rewards/margins": 0.46228185296058655, "rewards/rejected": -0.6064336895942688, "step": 4758 }, { "epoch": 13.02943189596167, "grad_norm": 3.3589260578155518, "learning_rate": 3.480821917808219e-07, "log_odds_chosen": 2.7574691772460938, "log_odds_ratio": -0.16875465214252472, "logits/chosen": 0.9142718315124512, "logits/rejected": 0.9104800224304199, "logps/chosen": -1.6274725198745728, "logps/rejected": -4.139548301696777, "loss": 0.4821, "nll_loss": 0.4652692675590515, "rewards/accuracies": 1.0, "rewards/chosen": -0.16274724900722504, "rewards/margins": 0.251207560300827, "rewards/rejected": -0.41395485401153564, "step": 4759 }, { "epoch": 13.032169746748803, "grad_norm": 3.966855049133301, "learning_rate": 3.4794520547945205e-07, "log_odds_chosen": 1.851987600326538, "log_odds_ratio": -0.2276376187801361, "logits/chosen": 0.7178213596343994, "logits/rejected": 0.651530385017395, "logps/chosen": -1.6215119361877441, "logps/rejected": -3.3225016593933105, "loss": 0.6374, "nll_loss": 0.6146694421768188, "rewards/accuracies": 1.0, "rewards/chosen": -0.16215120255947113, "rewards/margins": 0.1700989454984665, "rewards/rejected": -0.3322501480579376, "step": 4760 }, { "epoch": 13.034907597535934, "grad_norm": 3.617490530014038, "learning_rate": 3.4780821917808215e-07, "log_odds_chosen": 3.3540537357330322, "log_odds_ratio": -0.11824528127908707, "logits/chosen": 0.9505715370178223, "logits/rejected": 0.9145570993423462, "logps/chosen": -1.5771198272705078, "logps/rejected": -4.643996715545654, "loss": 0.5455, "nll_loss": 0.5336763858795166, "rewards/accuracies": 1.0, "rewards/chosen": -0.15771198272705078, "rewards/margins": 0.30668771266937256, "rewards/rejected": -0.46439966559410095, "step": 4761 }, { "epoch": 13.037645448323067, "grad_norm": 3.9537079334259033, "learning_rate": 3.476712328767123e-07, "log_odds_chosen": 3.8133716583251953, "log_odds_ratio": -0.14210185408592224, "logits/chosen": 0.8201384544372559, "logits/rejected": 0.7720761299133301, "logps/chosen": -1.7287706136703491, "logps/rejected": -5.362358093261719, "loss": 0.5994, "nll_loss": 0.5851624608039856, "rewards/accuracies": 1.0, "rewards/chosen": -0.17287707328796387, "rewards/margins": 0.363358736038208, "rewards/rejected": -0.5362358093261719, "step": 4762 }, { "epoch": 13.040383299110198, "grad_norm": 3.289590835571289, "learning_rate": 3.4753424657534246e-07, "log_odds_chosen": 3.356945514678955, "log_odds_ratio": -0.1629091203212738, "logits/chosen": 0.9530794024467468, "logits/rejected": 0.8951296806335449, "logps/chosen": -1.7887816429138184, "logps/rejected": -4.9891557693481445, "loss": 0.5319, "nll_loss": 0.5155914425849915, "rewards/accuracies": 1.0, "rewards/chosen": -0.17887815833091736, "rewards/margins": 0.3200374245643616, "rewards/rejected": -0.49891558289527893, "step": 4763 }, { "epoch": 13.043121149897331, "grad_norm": 3.8685033321380615, "learning_rate": 3.473972602739726e-07, "log_odds_chosen": 2.4334030151367188, "log_odds_ratio": -0.21789059042930603, "logits/chosen": 1.2245713472366333, "logits/rejected": 1.2474913597106934, "logps/chosen": -1.4487872123718262, "logps/rejected": -3.644232988357544, "loss": 0.6117, "nll_loss": 0.5899604558944702, "rewards/accuracies": 1.0, "rewards/chosen": -0.14487873017787933, "rewards/margins": 0.21954457461833954, "rewards/rejected": -0.3644232749938965, "step": 4764 }, { "epoch": 13.045859000684462, "grad_norm": 4.232178211212158, "learning_rate": 3.472602739726027e-07, "log_odds_chosen": 3.4243996143341064, "log_odds_ratio": -0.1343931257724762, "logits/chosen": 0.9643816947937012, "logits/rejected": 0.8997194170951843, "logps/chosen": -2.0061843395233154, "logps/rejected": -5.252879619598389, "loss": 0.7313, "nll_loss": 0.7178639769554138, "rewards/accuracies": 1.0, "rewards/chosen": -0.2006184458732605, "rewards/margins": 0.3246695399284363, "rewards/rejected": -0.5252879858016968, "step": 4765 }, { "epoch": 13.048596851471595, "grad_norm": 4.011425495147705, "learning_rate": 3.4712328767123286e-07, "log_odds_chosen": 1.7458271980285645, "log_odds_ratio": -0.3266899883747101, "logits/chosen": 1.1559940576553345, "logits/rejected": 1.1473740339279175, "logps/chosen": -1.8103622198104858, "logps/rejected": -3.3637375831604004, "loss": 0.5462, "nll_loss": 0.513486385345459, "rewards/accuracies": 0.75, "rewards/chosen": -0.18103621900081635, "rewards/margins": 0.15533751249313354, "rewards/rejected": -0.3363737463951111, "step": 4766 }, { "epoch": 13.051334702258726, "grad_norm": 3.4720799922943115, "learning_rate": 3.46986301369863e-07, "log_odds_chosen": 2.159813404083252, "log_odds_ratio": -0.2551431655883789, "logits/chosen": 0.8369801640510559, "logits/rejected": 0.8113940358161926, "logps/chosen": -2.15808367729187, "logps/rejected": -4.140432357788086, "loss": 0.5656, "nll_loss": 0.5401293039321899, "rewards/accuracies": 0.875, "rewards/chosen": -0.21580837666988373, "rewards/margins": 0.19823482632637024, "rewards/rejected": -0.41404321789741516, "step": 4767 }, { "epoch": 13.05407255304586, "grad_norm": 4.402844429016113, "learning_rate": 3.468493150684931e-07, "log_odds_chosen": 2.771214485168457, "log_odds_ratio": -0.2202441692352295, "logits/chosen": 0.9912612438201904, "logits/rejected": 0.9959033727645874, "logps/chosen": -2.242947578430176, "logps/rejected": -4.908690452575684, "loss": 0.711, "nll_loss": 0.6889395713806152, "rewards/accuracies": 1.0, "rewards/chosen": -0.2242947816848755, "rewards/margins": 0.26657429337501526, "rewards/rejected": -0.49086907505989075, "step": 4768 }, { "epoch": 13.05681040383299, "grad_norm": 3.7853500843048096, "learning_rate": 3.4671232876712326e-07, "log_odds_chosen": 1.467501163482666, "log_odds_ratio": -0.3290792405605316, "logits/chosen": 1.2039780616760254, "logits/rejected": 1.191365122795105, "logps/chosen": -2.133175849914551, "logps/rejected": -3.4953808784484863, "loss": 0.5513, "nll_loss": 0.5183639526367188, "rewards/accuracies": 1.0, "rewards/chosen": -0.21331758797168732, "rewards/margins": 0.1362205147743225, "rewards/rejected": -0.34953808784484863, "step": 4769 }, { "epoch": 13.059548254620124, "grad_norm": 3.8255131244659424, "learning_rate": 3.465753424657534e-07, "log_odds_chosen": 2.929098606109619, "log_odds_ratio": -0.19266094267368317, "logits/chosen": 0.8529976010322571, "logits/rejected": 0.901221513748169, "logps/chosen": -2.2441227436065674, "logps/rejected": -5.078017234802246, "loss": 0.6384, "nll_loss": 0.6191750168800354, "rewards/accuracies": 1.0, "rewards/chosen": -0.22441227734088898, "rewards/margins": 0.2833894193172455, "rewards/rejected": -0.5078016519546509, "step": 4770 }, { "epoch": 13.062286105407255, "grad_norm": 4.560791492462158, "learning_rate": 3.4643835616438357e-07, "log_odds_chosen": 3.5602169036865234, "log_odds_ratio": -0.3118230104446411, "logits/chosen": 1.1508064270019531, "logits/rejected": 1.1779112815856934, "logps/chosen": -2.7434282302856445, "logps/rejected": -6.177454948425293, "loss": 0.6756, "nll_loss": 0.6444311738014221, "rewards/accuracies": 0.75, "rewards/chosen": -0.2743428349494934, "rewards/margins": 0.3434026539325714, "rewards/rejected": -0.6177455186843872, "step": 4771 }, { "epoch": 13.065023956194388, "grad_norm": 3.281214952468872, "learning_rate": 3.4630136986301367e-07, "log_odds_chosen": 2.269202709197998, "log_odds_ratio": -0.16622696816921234, "logits/chosen": 1.3031609058380127, "logits/rejected": 1.2559417486190796, "logps/chosen": -1.389878273010254, "logps/rejected": -3.410391330718994, "loss": 0.4719, "nll_loss": 0.4553058445453644, "rewards/accuracies": 1.0, "rewards/chosen": -0.13898783922195435, "rewards/margins": 0.2020513117313385, "rewards/rejected": -0.34103912115097046, "step": 4772 }, { "epoch": 13.067761806981519, "grad_norm": 3.537447690963745, "learning_rate": 3.4616438356164387e-07, "log_odds_chosen": 3.293806552886963, "log_odds_ratio": -0.09345187246799469, "logits/chosen": 0.9531840085983276, "logits/rejected": 0.8720447421073914, "logps/chosen": -1.2810766696929932, "logps/rejected": -4.17893123626709, "loss": 0.5164, "nll_loss": 0.5070270299911499, "rewards/accuracies": 1.0, "rewards/chosen": -0.12810766696929932, "rewards/margins": 0.2897854745388031, "rewards/rejected": -0.4178931415081024, "step": 4773 }, { "epoch": 13.070499657768652, "grad_norm": 4.292027473449707, "learning_rate": 3.4602739726027397e-07, "log_odds_chosen": 1.9839701652526855, "log_odds_ratio": -0.46410632133483887, "logits/chosen": 0.8574694991111755, "logits/rejected": 0.8324831128120422, "logps/chosen": -2.3235971927642822, "logps/rejected": -4.186975002288818, "loss": 0.5365, "nll_loss": 0.49009186029434204, "rewards/accuracies": 0.875, "rewards/chosen": -0.23235970735549927, "rewards/margins": 0.18633782863616943, "rewards/rejected": -0.4186975359916687, "step": 4774 }, { "epoch": 13.073237508555783, "grad_norm": 7.955894470214844, "learning_rate": 3.4589041095890407e-07, "log_odds_chosen": 0.8024277687072754, "log_odds_ratio": -0.5770679712295532, "logits/chosen": 1.078728437423706, "logits/rejected": 1.088189959526062, "logps/chosen": -2.5185840129852295, "logps/rejected": -3.1884002685546875, "loss": 0.5708, "nll_loss": 0.5130640864372253, "rewards/accuracies": 0.75, "rewards/chosen": -0.2518584132194519, "rewards/margins": 0.06698162853717804, "rewards/rejected": -0.31884002685546875, "step": 4775 }, { "epoch": 13.075975359342916, "grad_norm": 9.949579238891602, "learning_rate": 3.457534246575342e-07, "log_odds_chosen": 1.093379020690918, "log_odds_ratio": -0.5229652523994446, "logits/chosen": 1.007798194885254, "logits/rejected": 0.9418596625328064, "logps/chosen": -2.6970951557159424, "logps/rejected": -3.6495180130004883, "loss": 0.5206, "nll_loss": 0.4682854413986206, "rewards/accuracies": 0.875, "rewards/chosen": -0.2697095274925232, "rewards/margins": 0.09524230659008026, "rewards/rejected": -0.36495181918144226, "step": 4776 }, { "epoch": 13.078713210130047, "grad_norm": 3.5259861946105957, "learning_rate": 3.456164383561644e-07, "log_odds_chosen": 2.0024375915527344, "log_odds_ratio": -0.2075933814048767, "logits/chosen": 1.1169397830963135, "logits/rejected": 1.1691337823867798, "logps/chosen": -1.7385060787200928, "logps/rejected": -3.480093479156494, "loss": 0.4988, "nll_loss": 0.4780304431915283, "rewards/accuracies": 1.0, "rewards/chosen": -0.1738506257534027, "rewards/margins": 0.1741587072610855, "rewards/rejected": -0.34800928831100464, "step": 4777 }, { "epoch": 13.08145106091718, "grad_norm": 3.6898748874664307, "learning_rate": 3.454794520547945e-07, "log_odds_chosen": 3.2471837997436523, "log_odds_ratio": -0.20365945994853973, "logits/chosen": 1.207969307899475, "logits/rejected": 1.2491706609725952, "logps/chosen": -2.0438928604125977, "logps/rejected": -5.157022476196289, "loss": 0.609, "nll_loss": 0.5886695981025696, "rewards/accuracies": 0.875, "rewards/chosen": -0.2043893039226532, "rewards/margins": 0.3113129436969757, "rewards/rejected": -0.5157022476196289, "step": 4778 }, { "epoch": 13.084188911704311, "grad_norm": 3.5144236087799072, "learning_rate": 3.453424657534246e-07, "log_odds_chosen": 1.6050746440887451, "log_odds_ratio": -0.24901652336120605, "logits/chosen": 1.0568182468414307, "logits/rejected": 0.9877929091453552, "logps/chosen": -1.2948782444000244, "logps/rejected": -2.687041997909546, "loss": 0.4601, "nll_loss": 0.4352068305015564, "rewards/accuracies": 1.0, "rewards/chosen": -0.12948782742023468, "rewards/margins": 0.1392163783311844, "rewards/rejected": -0.26870420575141907, "step": 4779 }, { "epoch": 13.086926762491444, "grad_norm": 5.147400379180908, "learning_rate": 3.4520547945205483e-07, "log_odds_chosen": 1.8111748695373535, "log_odds_ratio": -0.24412815272808075, "logits/chosen": 1.0341465473175049, "logits/rejected": 0.9415433406829834, "logps/chosen": -1.57588791847229, "logps/rejected": -3.1585588455200195, "loss": 0.5189, "nll_loss": 0.4944736659526825, "rewards/accuracies": 1.0, "rewards/chosen": -0.15758879482746124, "rewards/margins": 0.158267080783844, "rewards/rejected": -0.31585589051246643, "step": 4780 }, { "epoch": 13.089664613278575, "grad_norm": 4.6142988204956055, "learning_rate": 3.4506849315068493e-07, "log_odds_chosen": 2.348933696746826, "log_odds_ratio": -0.24493111670017242, "logits/chosen": 1.1005905866622925, "logits/rejected": 1.1187031269073486, "logps/chosen": -2.2452735900878906, "logps/rejected": -4.420691967010498, "loss": 0.51, "nll_loss": 0.4855051636695862, "rewards/accuracies": 1.0, "rewards/chosen": -0.22452735900878906, "rewards/margins": 0.2175418734550476, "rewards/rejected": -0.4420692026615143, "step": 4781 }, { "epoch": 13.092402464065708, "grad_norm": 3.7714040279388428, "learning_rate": 3.4493150684931503e-07, "log_odds_chosen": 2.4086532592773438, "log_odds_ratio": -0.24105417728424072, "logits/chosen": 0.8364851474761963, "logits/rejected": 0.8169612288475037, "logps/chosen": -2.025425910949707, "logps/rejected": -4.225749969482422, "loss": 0.5511, "nll_loss": 0.526947021484375, "rewards/accuracies": 0.875, "rewards/chosen": -0.20254257321357727, "rewards/margins": 0.22003242373466492, "rewards/rejected": -0.4225749969482422, "step": 4782 }, { "epoch": 13.095140314852841, "grad_norm": 3.619715452194214, "learning_rate": 3.447945205479452e-07, "log_odds_chosen": 2.5269579887390137, "log_odds_ratio": -0.24558907747268677, "logits/chosen": 0.92718505859375, "logits/rejected": 0.8296713829040527, "logps/chosen": -1.8029968738555908, "logps/rejected": -4.117222785949707, "loss": 0.5361, "nll_loss": 0.5115861892700195, "rewards/accuracies": 0.875, "rewards/chosen": -0.18029969930648804, "rewards/margins": 0.231422558426857, "rewards/rejected": -0.41172224283218384, "step": 4783 }, { "epoch": 13.097878165639973, "grad_norm": 3.3535377979278564, "learning_rate": 3.4465753424657533e-07, "log_odds_chosen": 2.0828237533569336, "log_odds_ratio": -0.24728909134864807, "logits/chosen": 0.8145296573638916, "logits/rejected": 0.7717151641845703, "logps/chosen": -1.993973731994629, "logps/rejected": -3.9528613090515137, "loss": 0.5519, "nll_loss": 0.5271583795547485, "rewards/accuracies": 0.875, "rewards/chosen": -0.19939737021923065, "rewards/margins": 0.19588878750801086, "rewards/rejected": -0.3952861726284027, "step": 4784 }, { "epoch": 13.100616016427105, "grad_norm": 3.3650565147399902, "learning_rate": 3.445205479452055e-07, "log_odds_chosen": 3.9479236602783203, "log_odds_ratio": -0.14268732070922852, "logits/chosen": 0.9232207536697388, "logits/rejected": 0.9092217683792114, "logps/chosen": -1.6930186748504639, "logps/rejected": -5.450688362121582, "loss": 0.5565, "nll_loss": 0.542273759841919, "rewards/accuracies": 1.0, "rewards/chosen": -0.1693018674850464, "rewards/margins": 0.37576693296432495, "rewards/rejected": -0.5450688600540161, "step": 4785 }, { "epoch": 13.103353867214237, "grad_norm": 3.9731173515319824, "learning_rate": 3.443835616438356e-07, "log_odds_chosen": 2.712494373321533, "log_odds_ratio": -0.18013346195220947, "logits/chosen": 1.155020833015442, "logits/rejected": 1.2521464824676514, "logps/chosen": -1.9914149045944214, "logps/rejected": -4.541020393371582, "loss": 0.5993, "nll_loss": 0.5813275575637817, "rewards/accuracies": 1.0, "rewards/chosen": -0.1991415023803711, "rewards/margins": 0.2549605369567871, "rewards/rejected": -0.454102098941803, "step": 4786 }, { "epoch": 13.10609171800137, "grad_norm": 3.7281339168548584, "learning_rate": 3.4424657534246574e-07, "log_odds_chosen": 2.4172134399414062, "log_odds_ratio": -0.23632651567459106, "logits/chosen": 1.2370691299438477, "logits/rejected": 1.2325992584228516, "logps/chosen": -1.2828395366668701, "logps/rejected": -3.3368353843688965, "loss": 0.5143, "nll_loss": 0.4906545877456665, "rewards/accuracies": 1.0, "rewards/chosen": -0.12828396260738373, "rewards/margins": 0.2053995579481125, "rewards/rejected": -0.3336835503578186, "step": 4787 }, { "epoch": 13.1088295687885, "grad_norm": 5.9482221603393555, "learning_rate": 3.441095890410959e-07, "log_odds_chosen": 3.4315199851989746, "log_odds_ratio": -0.5350777506828308, "logits/chosen": 1.0717403888702393, "logits/rejected": 1.0706685781478882, "logps/chosen": -2.33573055267334, "logps/rejected": -5.56905460357666, "loss": 0.6211, "nll_loss": 0.567571222782135, "rewards/accuracies": 0.875, "rewards/chosen": -0.2335730791091919, "rewards/margins": 0.32333239912986755, "rewards/rejected": -0.5569054484367371, "step": 4788 }, { "epoch": 13.111567419575634, "grad_norm": 9.98059368133545, "learning_rate": 3.43972602739726e-07, "log_odds_chosen": 0.8462984561920166, "log_odds_ratio": -0.8724716901779175, "logits/chosen": 1.303409218788147, "logits/rejected": 1.3194098472595215, "logps/chosen": -3.273017406463623, "logps/rejected": -4.004913806915283, "loss": 0.6636, "nll_loss": 0.5763031840324402, "rewards/accuracies": 0.75, "rewards/chosen": -0.3273017406463623, "rewards/margins": 0.07318967580795288, "rewards/rejected": -0.4004913866519928, "step": 4789 }, { "epoch": 13.114305270362765, "grad_norm": 3.7043662071228027, "learning_rate": 3.4383561643835614e-07, "log_odds_chosen": 2.7245094776153564, "log_odds_ratio": -0.14876727759838104, "logits/chosen": 0.9252951741218567, "logits/rejected": 0.8058198690414429, "logps/chosen": -1.0519157648086548, "logps/rejected": -3.246635913848877, "loss": 0.4419, "nll_loss": 0.42699944972991943, "rewards/accuracies": 1.0, "rewards/chosen": -0.10519157350063324, "rewards/margins": 0.2194720059633255, "rewards/rejected": -0.32466357946395874, "step": 4790 }, { "epoch": 13.117043121149898, "grad_norm": 6.618142127990723, "learning_rate": 3.436986301369863e-07, "log_odds_chosen": 2.0099499225616455, "log_odds_ratio": -0.7673449516296387, "logits/chosen": 1.0701367855072021, "logits/rejected": 1.0621063709259033, "logps/chosen": -2.677962303161621, "logps/rejected": -4.459181785583496, "loss": 0.637, "nll_loss": 0.5602230429649353, "rewards/accuracies": 0.875, "rewards/chosen": -0.26779624819755554, "rewards/margins": 0.17812198400497437, "rewards/rejected": -0.4459182024002075, "step": 4791 }, { "epoch": 13.119780971937029, "grad_norm": 3.432049036026001, "learning_rate": 3.4356164383561644e-07, "log_odds_chosen": 1.8130078315734863, "log_odds_ratio": -0.4579103887081146, "logits/chosen": 1.0800144672393799, "logits/rejected": 1.0693840980529785, "logps/chosen": -1.925417184829712, "logps/rejected": -3.6421542167663574, "loss": 0.5213, "nll_loss": 0.4755535423755646, "rewards/accuracies": 0.75, "rewards/chosen": -0.19254170358181, "rewards/margins": 0.17167368531227112, "rewards/rejected": -0.3642154335975647, "step": 4792 }, { "epoch": 13.122518822724162, "grad_norm": 3.790647268295288, "learning_rate": 3.4342465753424654e-07, "log_odds_chosen": 2.729614496231079, "log_odds_ratio": -0.2946409583091736, "logits/chosen": 1.012314796447754, "logits/rejected": 0.9693136215209961, "logps/chosen": -1.9332386255264282, "logps/rejected": -4.419436454772949, "loss": 0.541, "nll_loss": 0.511542558670044, "rewards/accuracies": 0.875, "rewards/chosen": -0.19332386553287506, "rewards/margins": 0.24861976504325867, "rewards/rejected": -0.4419436454772949, "step": 4793 }, { "epoch": 13.125256673511293, "grad_norm": 10.965855598449707, "learning_rate": 3.432876712328767e-07, "log_odds_chosen": 3.2217788696289062, "log_odds_ratio": -0.5842266082763672, "logits/chosen": 1.3016357421875, "logits/rejected": 1.2989234924316406, "logps/chosen": -2.3440332412719727, "logps/rejected": -5.296158313751221, "loss": 0.5905, "nll_loss": 0.5320659875869751, "rewards/accuracies": 0.875, "rewards/chosen": -0.2344033271074295, "rewards/margins": 0.2952125370502472, "rewards/rejected": -0.5296158790588379, "step": 4794 }, { "epoch": 13.127994524298426, "grad_norm": 5.878852844238281, "learning_rate": 3.4315068493150685e-07, "log_odds_chosen": 0.9601131677627563, "log_odds_ratio": -0.42588719725608826, "logits/chosen": 1.1211841106414795, "logits/rejected": 1.0254899263381958, "logps/chosen": -2.2824926376342773, "logps/rejected": -3.120802879333496, "loss": 0.6086, "nll_loss": 0.5660585761070251, "rewards/accuracies": 0.875, "rewards/chosen": -0.22824925184249878, "rewards/margins": 0.08383102715015411, "rewards/rejected": -0.3120803236961365, "step": 4795 }, { "epoch": 13.130732375085557, "grad_norm": 3.4217369556427, "learning_rate": 3.4301369863013695e-07, "log_odds_chosen": 3.3327322006225586, "log_odds_ratio": -0.15429416298866272, "logits/chosen": 0.9211224913597107, "logits/rejected": 0.8331252932548523, "logps/chosen": -1.9968955516815186, "logps/rejected": -5.176521301269531, "loss": 0.5045, "nll_loss": 0.4890363812446594, "rewards/accuracies": 0.875, "rewards/chosen": -0.1996895670890808, "rewards/margins": 0.3179625868797302, "rewards/rejected": -0.517652153968811, "step": 4796 }, { "epoch": 13.13347022587269, "grad_norm": 3.9991912841796875, "learning_rate": 3.428767123287671e-07, "log_odds_chosen": 2.008883476257324, "log_odds_ratio": -0.30935993790626526, "logits/chosen": 1.280975103378296, "logits/rejected": 1.344658374786377, "logps/chosen": -2.322584629058838, "logps/rejected": -4.217136383056641, "loss": 0.6264, "nll_loss": 0.5955030918121338, "rewards/accuracies": 0.875, "rewards/chosen": -0.23225846886634827, "rewards/margins": 0.18945516645908356, "rewards/rejected": -0.42171362042427063, "step": 4797 }, { "epoch": 13.136208076659821, "grad_norm": 3.270777463912964, "learning_rate": 3.4273972602739725e-07, "log_odds_chosen": 4.461112022399902, "log_odds_ratio": -0.12672603130340576, "logits/chosen": 1.1909525394439697, "logits/rejected": 1.2378454208374023, "logps/chosen": -1.5766386985778809, "logps/rejected": -5.751384258270264, "loss": 0.5264, "nll_loss": 0.5137702226638794, "rewards/accuracies": 1.0, "rewards/chosen": -0.15766388177871704, "rewards/margins": 0.41747450828552246, "rewards/rejected": -0.5751383900642395, "step": 4798 }, { "epoch": 13.138945927446954, "grad_norm": 4.024716377258301, "learning_rate": 3.426027397260274e-07, "log_odds_chosen": 1.9809986352920532, "log_odds_ratio": -0.3760780692100525, "logits/chosen": 1.0626040697097778, "logits/rejected": 1.0807969570159912, "logps/chosen": -1.9723703861236572, "logps/rejected": -3.8535995483398438, "loss": 0.5351, "nll_loss": 0.4975214898586273, "rewards/accuracies": 0.75, "rewards/chosen": -0.1972370445728302, "rewards/margins": 0.18812289834022522, "rewards/rejected": -0.3853599429130554, "step": 4799 }, { "epoch": 13.141683778234086, "grad_norm": 3.6428792476654053, "learning_rate": 3.424657534246575e-07, "log_odds_chosen": 2.3501193523406982, "log_odds_ratio": -0.1668131798505783, "logits/chosen": 1.1764262914657593, "logits/rejected": 1.216752052307129, "logps/chosen": -1.3450813293457031, "logps/rejected": -3.440516948699951, "loss": 0.4683, "nll_loss": 0.4516388773918152, "rewards/accuracies": 1.0, "rewards/chosen": -0.1345081329345703, "rewards/margins": 0.20954355597496033, "rewards/rejected": -0.34405165910720825, "step": 4800 }, { "epoch": 13.144421629021219, "grad_norm": 2.9177725315093994, "learning_rate": 3.4232876712328765e-07, "log_odds_chosen": 2.325253963470459, "log_odds_ratio": -0.15756762027740479, "logits/chosen": 1.0796366930007935, "logits/rejected": 1.0421929359436035, "logps/chosen": -1.4942405223846436, "logps/rejected": -3.5748472213745117, "loss": 0.4713, "nll_loss": 0.45558714866638184, "rewards/accuracies": 1.0, "rewards/chosen": -0.14942404627799988, "rewards/margins": 0.20806068181991577, "rewards/rejected": -0.35748475790023804, "step": 4801 }, { "epoch": 13.14715947980835, "grad_norm": 3.9695799350738525, "learning_rate": 3.421917808219178e-07, "log_odds_chosen": 2.0804007053375244, "log_odds_ratio": -0.20800898969173431, "logits/chosen": 1.1444257497787476, "logits/rejected": 1.1054211854934692, "logps/chosen": -1.4994460344314575, "logps/rejected": -3.357815742492676, "loss": 0.4774, "nll_loss": 0.45656251907348633, "rewards/accuracies": 1.0, "rewards/chosen": -0.14994460344314575, "rewards/margins": 0.18583698570728302, "rewards/rejected": -0.3357815742492676, "step": 4802 }, { "epoch": 13.149897330595483, "grad_norm": 3.4515187740325928, "learning_rate": 3.420547945205479e-07, "log_odds_chosen": 2.3134944438934326, "log_odds_ratio": -0.3033848702907562, "logits/chosen": 1.09279203414917, "logits/rejected": 1.1408793926239014, "logps/chosen": -1.7174177169799805, "logps/rejected": -3.9380154609680176, "loss": 0.52, "nll_loss": 0.48966437578201294, "rewards/accuracies": 0.875, "rewards/chosen": -0.1717417687177658, "rewards/margins": 0.22205975651741028, "rewards/rejected": -0.39380156993865967, "step": 4803 }, { "epoch": 13.152635181382614, "grad_norm": 3.3338513374328613, "learning_rate": 3.419178082191781e-07, "log_odds_chosen": 3.5530598163604736, "log_odds_ratio": -0.10786754637956619, "logits/chosen": 1.1241159439086914, "logits/rejected": 1.0351169109344482, "logps/chosen": -1.7900317907333374, "logps/rejected": -5.151369094848633, "loss": 0.6831, "nll_loss": 0.6723374128341675, "rewards/accuracies": 1.0, "rewards/chosen": -0.17900316417217255, "rewards/margins": 0.33613377809524536, "rewards/rejected": -0.5151368975639343, "step": 4804 }, { "epoch": 13.155373032169747, "grad_norm": 3.9097201824188232, "learning_rate": 3.417808219178082e-07, "log_odds_chosen": 4.063755989074707, "log_odds_ratio": -0.23287703096866608, "logits/chosen": 1.2667235136032104, "logits/rejected": 1.2961840629577637, "logps/chosen": -2.406309127807617, "logps/rejected": -6.343274116516113, "loss": 0.556, "nll_loss": 0.5326663851737976, "rewards/accuracies": 1.0, "rewards/chosen": -0.24063090980052948, "rewards/margins": 0.39369648694992065, "rewards/rejected": -0.6343274116516113, "step": 4805 }, { "epoch": 13.158110882956878, "grad_norm": 4.406227111816406, "learning_rate": 3.4164383561643836e-07, "log_odds_chosen": 1.4718263149261475, "log_odds_ratio": -0.3392292857170105, "logits/chosen": 1.1799992322921753, "logits/rejected": 1.2198001146316528, "logps/chosen": -2.3213090896606445, "logps/rejected": -3.6958324909210205, "loss": 0.5035, "nll_loss": 0.46957463026046753, "rewards/accuracies": 0.875, "rewards/chosen": -0.23213091492652893, "rewards/margins": 0.13745233416557312, "rewards/rejected": -0.36958324909210205, "step": 4806 }, { "epoch": 13.160848733744011, "grad_norm": 3.2118494510650635, "learning_rate": 3.4150684931506846e-07, "log_odds_chosen": 3.354853630065918, "log_odds_ratio": -0.14401859045028687, "logits/chosen": 0.7410134077072144, "logits/rejected": 0.7094199657440186, "logps/chosen": -1.6248903274536133, "logps/rejected": -4.75929594039917, "loss": 0.4537, "nll_loss": 0.43934038281440735, "rewards/accuracies": 1.0, "rewards/chosen": -0.16248902678489685, "rewards/margins": 0.31344059109687805, "rewards/rejected": -0.4759296178817749, "step": 4807 }, { "epoch": 13.163586584531142, "grad_norm": 4.241784572601318, "learning_rate": 3.413698630136986e-07, "log_odds_chosen": 0.4126221537590027, "log_odds_ratio": -0.529789924621582, "logits/chosen": 1.2230044603347778, "logits/rejected": 1.1476720571517944, "logps/chosen": -1.4695740938186646, "logps/rejected": -1.8004157543182373, "loss": 0.6296, "nll_loss": 0.5766232013702393, "rewards/accuracies": 0.875, "rewards/chosen": -0.1469573974609375, "rewards/margins": 0.03308417648077011, "rewards/rejected": -0.18004156649112701, "step": 4808 }, { "epoch": 13.166324435318275, "grad_norm": 3.42505145072937, "learning_rate": 3.4123287671232876e-07, "log_odds_chosen": 1.84018075466156, "log_odds_ratio": -0.20176240801811218, "logits/chosen": 1.2210400104522705, "logits/rejected": 1.2372593879699707, "logps/chosen": -1.9384534358978271, "logps/rejected": -3.5963282585144043, "loss": 0.5552, "nll_loss": 0.5350329875946045, "rewards/accuracies": 1.0, "rewards/chosen": -0.19384533166885376, "rewards/margins": 0.165787473320961, "rewards/rejected": -0.35963279008865356, "step": 4809 }, { "epoch": 13.169062286105408, "grad_norm": 3.6062655448913574, "learning_rate": 3.4109589041095886e-07, "log_odds_chosen": 2.3895201683044434, "log_odds_ratio": -0.2759351134300232, "logits/chosen": 1.0044149160385132, "logits/rejected": 0.9412257671356201, "logps/chosen": -1.4483158588409424, "logps/rejected": -3.6529886722564697, "loss": 0.4439, "nll_loss": 0.41625893115997314, "rewards/accuracies": 0.875, "rewards/chosen": -0.144831582903862, "rewards/margins": 0.22046729922294617, "rewards/rejected": -0.36529889702796936, "step": 4810 }, { "epoch": 13.17180013689254, "grad_norm": 7.309447288513184, "learning_rate": 3.4095890410958907e-07, "log_odds_chosen": 3.1445674896240234, "log_odds_ratio": -0.2263718545436859, "logits/chosen": 1.0480339527130127, "logits/rejected": 1.0165297985076904, "logps/chosen": -2.2432260513305664, "logps/rejected": -5.221527576446533, "loss": 0.6381, "nll_loss": 0.6155083775520325, "rewards/accuracies": 1.0, "rewards/chosen": -0.2243226170539856, "rewards/margins": 0.29783016443252563, "rewards/rejected": -0.5221527814865112, "step": 4811 }, { "epoch": 13.174537987679672, "grad_norm": 3.743067979812622, "learning_rate": 3.4082191780821917e-07, "log_odds_chosen": 3.492110252380371, "log_odds_ratio": -0.21114201843738556, "logits/chosen": 0.9488371014595032, "logits/rejected": 0.886738121509552, "logps/chosen": -1.2300631999969482, "logps/rejected": -4.4659199714660645, "loss": 0.5282, "nll_loss": 0.5070716142654419, "rewards/accuracies": 1.0, "rewards/chosen": -0.12300632894039154, "rewards/margins": 0.3235856592655182, "rewards/rejected": -0.4465920031070709, "step": 4812 }, { "epoch": 13.177275838466803, "grad_norm": 4.6464104652404785, "learning_rate": 3.406849315068493e-07, "log_odds_chosen": 2.4899911880493164, "log_odds_ratio": -0.1746930181980133, "logits/chosen": 1.1079075336456299, "logits/rejected": 1.0858607292175293, "logps/chosen": -2.1394007205963135, "logps/rejected": -4.4707441329956055, "loss": 0.6567, "nll_loss": 0.6392278671264648, "rewards/accuracies": 1.0, "rewards/chosen": -0.2139400690793991, "rewards/margins": 0.23313435912132263, "rewards/rejected": -0.44707441329956055, "step": 4813 }, { "epoch": 13.180013689253936, "grad_norm": 3.7871618270874023, "learning_rate": 3.405479452054794e-07, "log_odds_chosen": 1.1312618255615234, "log_odds_ratio": -0.43398627638816833, "logits/chosen": 1.2336093187332153, "logits/rejected": 1.2209258079528809, "logps/chosen": -1.6407934427261353, "logps/rejected": -2.675149440765381, "loss": 0.5438, "nll_loss": 0.5004265904426575, "rewards/accuracies": 0.75, "rewards/chosen": -0.16407933831214905, "rewards/margins": 0.10343560576438904, "rewards/rejected": -0.2675149440765381, "step": 4814 }, { "epoch": 13.182751540041068, "grad_norm": 3.9627439975738525, "learning_rate": 3.4041095890410957e-07, "log_odds_chosen": 0.9267148971557617, "log_odds_ratio": -0.39275193214416504, "logits/chosen": 1.0851807594299316, "logits/rejected": 0.9618436694145203, "logps/chosen": -1.7665560245513916, "logps/rejected": -2.5687294006347656, "loss": 0.5421, "nll_loss": 0.5027796626091003, "rewards/accuracies": 0.875, "rewards/chosen": -0.1766556054353714, "rewards/margins": 0.08021734654903412, "rewards/rejected": -0.2568729519844055, "step": 4815 }, { "epoch": 13.1854893908282, "grad_norm": 3.2372817993164062, "learning_rate": 3.402739726027397e-07, "log_odds_chosen": 4.17367696762085, "log_odds_ratio": -0.08164320886135101, "logits/chosen": 1.1104450225830078, "logits/rejected": 1.1620668172836304, "logps/chosen": -1.164825439453125, "logps/rejected": -4.927585124969482, "loss": 0.4725, "nll_loss": 0.46435773372650146, "rewards/accuracies": 1.0, "rewards/chosen": -0.11648255586624146, "rewards/margins": 0.3762759864330292, "rewards/rejected": -0.49275851249694824, "step": 4816 }, { "epoch": 13.188227241615332, "grad_norm": 4.687083721160889, "learning_rate": 3.401369863013698e-07, "log_odds_chosen": 2.420374631881714, "log_odds_ratio": -0.1896205097436905, "logits/chosen": 0.9341335296630859, "logits/rejected": 0.9404539465904236, "logps/chosen": -2.0162124633789062, "logps/rejected": -4.223735809326172, "loss": 0.6257, "nll_loss": 0.6067843437194824, "rewards/accuracies": 0.875, "rewards/chosen": -0.20162123441696167, "rewards/margins": 0.22075232863426208, "rewards/rejected": -0.42237359285354614, "step": 4817 }, { "epoch": 13.190965092402465, "grad_norm": 3.899716854095459, "learning_rate": 3.4000000000000003e-07, "log_odds_chosen": 4.866442680358887, "log_odds_ratio": -0.19433337450027466, "logits/chosen": 0.9374884366989136, "logits/rejected": 0.9493095874786377, "logps/chosen": -1.6081725358963013, "logps/rejected": -6.269252777099609, "loss": 0.576, "nll_loss": 0.5565729141235352, "rewards/accuracies": 1.0, "rewards/chosen": -0.1608172506093979, "rewards/margins": 0.4661080837249756, "rewards/rejected": -0.6269252896308899, "step": 4818 }, { "epoch": 13.193702943189596, "grad_norm": 3.7339513301849365, "learning_rate": 3.398630136986301e-07, "log_odds_chosen": 6.800175189971924, "log_odds_ratio": -0.1037876158952713, "logits/chosen": 1.1006295680999756, "logits/rejected": 1.151431679725647, "logps/chosen": -2.7975566387176514, "logps/rejected": -9.291633605957031, "loss": 0.8207, "nll_loss": 0.8103642463684082, "rewards/accuracies": 0.875, "rewards/chosen": -0.27975571155548096, "rewards/margins": 0.6494077444076538, "rewards/rejected": -0.9291634559631348, "step": 4819 }, { "epoch": 13.196440793976729, "grad_norm": 7.753359794616699, "learning_rate": 3.397260273972602e-07, "log_odds_chosen": 2.6242270469665527, "log_odds_ratio": -0.6681799292564392, "logits/chosen": 1.1183315515518188, "logits/rejected": 1.1457706689834595, "logps/chosen": -2.14823317527771, "logps/rejected": -4.615187644958496, "loss": 0.6452, "nll_loss": 0.5783934593200684, "rewards/accuracies": 0.75, "rewards/chosen": -0.21482332050800323, "rewards/margins": 0.24669545888900757, "rewards/rejected": -0.461518794298172, "step": 4820 }, { "epoch": 13.19917864476386, "grad_norm": 3.907501697540283, "learning_rate": 3.395890410958904e-07, "log_odds_chosen": 4.90150785446167, "log_odds_ratio": -0.028156902641057968, "logits/chosen": 1.2967147827148438, "logits/rejected": 1.319905161857605, "logps/chosen": -1.6512120962142944, "logps/rejected": -6.319787502288818, "loss": 0.5213, "nll_loss": 0.5184649229049683, "rewards/accuracies": 1.0, "rewards/chosen": -0.16512122750282288, "rewards/margins": 0.46685752272605896, "rewards/rejected": -0.6319787502288818, "step": 4821 }, { "epoch": 13.201916495550993, "grad_norm": 4.569025039672852, "learning_rate": 3.3945205479452053e-07, "log_odds_chosen": 3.2628159523010254, "log_odds_ratio": -0.43736132979393005, "logits/chosen": 1.183688759803772, "logits/rejected": 1.2640938758850098, "logps/chosen": -2.3398325443267822, "logps/rejected": -5.469343185424805, "loss": 0.6325, "nll_loss": 0.5888020992279053, "rewards/accuracies": 0.875, "rewards/chosen": -0.23398324847221375, "rewards/margins": 0.31295114755630493, "rewards/rejected": -0.5469343662261963, "step": 4822 }, { "epoch": 13.204654346338124, "grad_norm": 4.330228805541992, "learning_rate": 3.393150684931507e-07, "log_odds_chosen": 2.017242670059204, "log_odds_ratio": -0.3288801908493042, "logits/chosen": 1.2382361888885498, "logits/rejected": 1.1654099225997925, "logps/chosen": -2.3942887783050537, "logps/rejected": -4.29502534866333, "loss": 0.6069, "nll_loss": 0.5739791989326477, "rewards/accuracies": 0.875, "rewards/chosen": -0.23942886292934418, "rewards/margins": 0.19007368385791779, "rewards/rejected": -0.42950254678726196, "step": 4823 }, { "epoch": 13.207392197125257, "grad_norm": 3.340791940689087, "learning_rate": 3.391780821917808e-07, "log_odds_chosen": 3.154268741607666, "log_odds_ratio": -0.12914058566093445, "logits/chosen": 1.094848394393921, "logits/rejected": 0.9992321133613586, "logps/chosen": -1.3923749923706055, "logps/rejected": -4.250648498535156, "loss": 0.5212, "nll_loss": 0.508237361907959, "rewards/accuracies": 1.0, "rewards/chosen": -0.13923749327659607, "rewards/margins": 0.2858273386955261, "rewards/rejected": -0.4250648319721222, "step": 4824 }, { "epoch": 13.210130047912388, "grad_norm": 3.859459638595581, "learning_rate": 3.39041095890411e-07, "log_odds_chosen": 2.592900276184082, "log_odds_ratio": -0.13507550954818726, "logits/chosen": 1.0142316818237305, "logits/rejected": 1.0595368146896362, "logps/chosen": -1.6886253356933594, "logps/rejected": -4.083004951477051, "loss": 0.5132, "nll_loss": 0.49972495436668396, "rewards/accuracies": 1.0, "rewards/chosen": -0.16886255145072937, "rewards/margins": 0.23943795263767242, "rewards/rejected": -0.4083004891872406, "step": 4825 }, { "epoch": 13.212867898699521, "grad_norm": 4.186819076538086, "learning_rate": 3.389041095890411e-07, "log_odds_chosen": 4.299935340881348, "log_odds_ratio": -0.07887544482946396, "logits/chosen": 0.9572821855545044, "logits/rejected": 0.9556089639663696, "logps/chosen": -1.1526436805725098, "logps/rejected": -5.0841498374938965, "loss": 0.3906, "nll_loss": 0.3827323019504547, "rewards/accuracies": 1.0, "rewards/chosen": -0.11526436358690262, "rewards/margins": 0.3931506276130676, "rewards/rejected": -0.5084149837493896, "step": 4826 }, { "epoch": 13.215605749486652, "grad_norm": 3.607956647872925, "learning_rate": 3.387671232876712e-07, "log_odds_chosen": 3.8354363441467285, "log_odds_ratio": -0.13554441928863525, "logits/chosen": 1.278181552886963, "logits/rejected": 1.2954665422439575, "logps/chosen": -1.6485276222229004, "logps/rejected": -5.1682000160217285, "loss": 0.5264, "nll_loss": 0.5128294825553894, "rewards/accuracies": 1.0, "rewards/chosen": -0.1648527830839157, "rewards/margins": 0.3519672155380249, "rewards/rejected": -0.5168200135231018, "step": 4827 }, { "epoch": 13.218343600273785, "grad_norm": 3.221078634262085, "learning_rate": 3.3863013698630134e-07, "log_odds_chosen": 2.879089832305908, "log_odds_ratio": -0.15339075028896332, "logits/chosen": 1.2793700695037842, "logits/rejected": 1.3423004150390625, "logps/chosen": -2.1850626468658447, "logps/rejected": -4.948503494262695, "loss": 0.6262, "nll_loss": 0.6108579635620117, "rewards/accuracies": 1.0, "rewards/chosen": -0.21850627660751343, "rewards/margins": 0.27634406089782715, "rewards/rejected": -0.4948503375053406, "step": 4828 }, { "epoch": 13.221081451060916, "grad_norm": 4.344176769256592, "learning_rate": 3.384931506849315e-07, "log_odds_chosen": 2.1901371479034424, "log_odds_ratio": -0.3780860900878906, "logits/chosen": 1.039870023727417, "logits/rejected": 0.9080870151519775, "logps/chosen": -1.865877628326416, "logps/rejected": -3.967418670654297, "loss": 0.561, "nll_loss": 0.5231484770774841, "rewards/accuracies": 0.75, "rewards/chosen": -0.18658776581287384, "rewards/margins": 0.21015410125255585, "rewards/rejected": -0.3967418670654297, "step": 4829 }, { "epoch": 13.22381930184805, "grad_norm": 3.753971576690674, "learning_rate": 3.3835616438356164e-07, "log_odds_chosen": 2.5379889011383057, "log_odds_ratio": -0.2041040062904358, "logits/chosen": 0.9162701368331909, "logits/rejected": 0.7689844965934753, "logps/chosen": -1.8588812351226807, "logps/rejected": -4.188295364379883, "loss": 0.4826, "nll_loss": 0.46220386028289795, "rewards/accuracies": 1.0, "rewards/chosen": -0.1858881413936615, "rewards/margins": 0.23294144868850708, "rewards/rejected": -0.4188295900821686, "step": 4830 }, { "epoch": 13.22655715263518, "grad_norm": 4.984622001647949, "learning_rate": 3.3821917808219174e-07, "log_odds_chosen": 0.6972769498825073, "log_odds_ratio": -0.4266304075717926, "logits/chosen": 1.0559382438659668, "logits/rejected": 0.9811835289001465, "logps/chosen": -2.236428737640381, "logps/rejected": -2.8279523849487305, "loss": 0.5996, "nll_loss": 0.5569254159927368, "rewards/accuracies": 0.875, "rewards/chosen": -0.22364288568496704, "rewards/margins": 0.05915234982967377, "rewards/rejected": -0.282795250415802, "step": 4831 }, { "epoch": 13.229295003422314, "grad_norm": 6.712510585784912, "learning_rate": 3.3808219178082194e-07, "log_odds_chosen": 0.42788779735565186, "log_odds_ratio": -0.6691404581069946, "logits/chosen": 1.0814695358276367, "logits/rejected": 1.1296391487121582, "logps/chosen": -2.4934701919555664, "logps/rejected": -2.869141101837158, "loss": 0.5534, "nll_loss": 0.4865211844444275, "rewards/accuracies": 0.75, "rewards/chosen": -0.2493470013141632, "rewards/margins": 0.03756709396839142, "rewards/rejected": -0.2869141101837158, "step": 4832 }, { "epoch": 13.232032854209445, "grad_norm": 3.3963756561279297, "learning_rate": 3.3794520547945204e-07, "log_odds_chosen": 4.172022342681885, "log_odds_ratio": -0.1299864798784256, "logits/chosen": 1.3661766052246094, "logits/rejected": 1.401646375656128, "logps/chosen": -1.8428080081939697, "logps/rejected": -5.785871505737305, "loss": 0.5431, "nll_loss": 0.5300867557525635, "rewards/accuracies": 1.0, "rewards/chosen": -0.18428078293800354, "rewards/margins": 0.39430636167526245, "rewards/rejected": -0.5785871744155884, "step": 4833 }, { "epoch": 13.234770704996578, "grad_norm": 4.221595287322998, "learning_rate": 3.3780821917808214e-07, "log_odds_chosen": 3.8753864765167236, "log_odds_ratio": -0.27330100536346436, "logits/chosen": 0.7703813910484314, "logits/rejected": 0.6936252117156982, "logps/chosen": -1.9324548244476318, "logps/rejected": -5.684624671936035, "loss": 0.6071, "nll_loss": 0.5797674655914307, "rewards/accuracies": 1.0, "rewards/chosen": -0.19324547052383423, "rewards/margins": 0.3752169609069824, "rewards/rejected": -0.5684623718261719, "step": 4834 }, { "epoch": 13.23750855578371, "grad_norm": 3.710221529006958, "learning_rate": 3.376712328767123e-07, "log_odds_chosen": 2.1050472259521484, "log_odds_ratio": -0.3318202495574951, "logits/chosen": 0.6977400779724121, "logits/rejected": 0.6409099698066711, "logps/chosen": -2.165031909942627, "logps/rejected": -4.1293044090271, "loss": 0.5666, "nll_loss": 0.533379852771759, "rewards/accuracies": 0.875, "rewards/chosen": -0.21650318801403046, "rewards/margins": 0.19642727077007294, "rewards/rejected": -0.412930428981781, "step": 4835 }, { "epoch": 13.240246406570842, "grad_norm": 5.86970329284668, "learning_rate": 3.3753424657534245e-07, "log_odds_chosen": 3.7664742469787598, "log_odds_ratio": -0.19184482097625732, "logits/chosen": 1.2191588878631592, "logits/rejected": 1.2115166187286377, "logps/chosen": -3.405989408493042, "logps/rejected": -7.064487934112549, "loss": 0.7992, "nll_loss": 0.779979407787323, "rewards/accuracies": 0.875, "rewards/chosen": -0.3405989408493042, "rewards/margins": 0.36584991216659546, "rewards/rejected": -0.7064487934112549, "step": 4836 }, { "epoch": 13.242984257357975, "grad_norm": 3.6182851791381836, "learning_rate": 3.373972602739726e-07, "log_odds_chosen": 2.9788379669189453, "log_odds_ratio": -0.17130926251411438, "logits/chosen": 1.1185054779052734, "logits/rejected": 1.1321138143539429, "logps/chosen": -1.5985922813415527, "logps/rejected": -4.331095218658447, "loss": 0.5536, "nll_loss": 0.5364320874214172, "rewards/accuracies": 1.0, "rewards/chosen": -0.15985924005508423, "rewards/margins": 0.2732503116130829, "rewards/rejected": -0.4331095814704895, "step": 4837 }, { "epoch": 13.245722108145106, "grad_norm": 4.810439109802246, "learning_rate": 3.372602739726027e-07, "log_odds_chosen": 3.0677947998046875, "log_odds_ratio": -0.2196250557899475, "logits/chosen": 1.1745140552520752, "logits/rejected": 1.1707897186279297, "logps/chosen": -2.6584630012512207, "logps/rejected": -5.612290382385254, "loss": 0.6762, "nll_loss": 0.6541894674301147, "rewards/accuracies": 1.0, "rewards/chosen": -0.265846312046051, "rewards/margins": 0.2953827381134033, "rewards/rejected": -0.5612290501594543, "step": 4838 }, { "epoch": 13.248459958932239, "grad_norm": 5.656007766723633, "learning_rate": 3.371232876712329e-07, "log_odds_chosen": 3.874321937561035, "log_odds_ratio": -0.35839027166366577, "logits/chosen": 1.3746259212493896, "logits/rejected": 1.3685269355773926, "logps/chosen": -2.612743377685547, "logps/rejected": -6.377211570739746, "loss": 0.6225, "nll_loss": 0.5866915583610535, "rewards/accuracies": 0.75, "rewards/chosen": -0.2612743377685547, "rewards/margins": 0.37644678354263306, "rewards/rejected": -0.637721061706543, "step": 4839 }, { "epoch": 13.25119780971937, "grad_norm": 3.85623836517334, "learning_rate": 3.36986301369863e-07, "log_odds_chosen": 2.7465786933898926, "log_odds_ratio": -0.1002037450671196, "logits/chosen": 1.297972321510315, "logits/rejected": 1.376420259475708, "logps/chosen": -1.779494047164917, "logps/rejected": -4.272306442260742, "loss": 0.522, "nll_loss": 0.5119432806968689, "rewards/accuracies": 1.0, "rewards/chosen": -0.17794939875602722, "rewards/margins": 0.24928124248981476, "rewards/rejected": -0.4272306263446808, "step": 4840 }, { "epoch": 13.253935660506503, "grad_norm": 5.083042144775391, "learning_rate": 3.368493150684931e-07, "log_odds_chosen": 3.386262893676758, "log_odds_ratio": -0.1859026551246643, "logits/chosen": 0.8912407159805298, "logits/rejected": 0.8678977489471436, "logps/chosen": -2.2646522521972656, "logps/rejected": -5.482272624969482, "loss": 0.5831, "nll_loss": 0.5645418763160706, "rewards/accuracies": 1.0, "rewards/chosen": -0.22646522521972656, "rewards/margins": 0.3217620551586151, "rewards/rejected": -0.5482272505760193, "step": 4841 }, { "epoch": 13.256673511293634, "grad_norm": 3.622171640396118, "learning_rate": 3.367123287671233e-07, "log_odds_chosen": 1.49403977394104, "log_odds_ratio": -0.29357194900512695, "logits/chosen": 1.4397023916244507, "logits/rejected": 1.408447265625, "logps/chosen": -1.7797214984893799, "logps/rejected": -3.1225833892822266, "loss": 0.5484, "nll_loss": 0.5190597176551819, "rewards/accuracies": 1.0, "rewards/chosen": -0.17797215282917023, "rewards/margins": 0.13428616523742676, "rewards/rejected": -0.3122583329677582, "step": 4842 }, { "epoch": 13.259411362080767, "grad_norm": 3.4025862216949463, "learning_rate": 3.365753424657534e-07, "log_odds_chosen": 3.230788469314575, "log_odds_ratio": -0.15095189213752747, "logits/chosen": 0.9678751230239868, "logits/rejected": 0.9992021322250366, "logps/chosen": -2.1732659339904785, "logps/rejected": -5.300246238708496, "loss": 0.7005, "nll_loss": 0.6854304075241089, "rewards/accuracies": 1.0, "rewards/chosen": -0.2173265814781189, "rewards/margins": 0.3126980662345886, "rewards/rejected": -0.5300246477127075, "step": 4843 }, { "epoch": 13.262149212867898, "grad_norm": 3.7759764194488525, "learning_rate": 3.3643835616438356e-07, "log_odds_chosen": 4.20732307434082, "log_odds_ratio": -0.104193776845932, "logits/chosen": 1.1000515222549438, "logits/rejected": 1.032679796218872, "logps/chosen": -1.3634172677993774, "logps/rejected": -5.228108882904053, "loss": 0.6228, "nll_loss": 0.6123536229133606, "rewards/accuracies": 1.0, "rewards/chosen": -0.13634172081947327, "rewards/margins": 0.38646918535232544, "rewards/rejected": -0.5228109359741211, "step": 4844 }, { "epoch": 13.264887063655031, "grad_norm": 3.1376290321350098, "learning_rate": 3.3630136986301366e-07, "log_odds_chosen": 3.8226890563964844, "log_odds_ratio": -0.10037799924612045, "logits/chosen": 1.1377590894699097, "logits/rejected": 1.0904229879379272, "logps/chosen": -1.4680627584457397, "logps/rejected": -4.928702354431152, "loss": 0.5287, "nll_loss": 0.5186182260513306, "rewards/accuracies": 1.0, "rewards/chosen": -0.1468062698841095, "rewards/margins": 0.34606391191482544, "rewards/rejected": -0.4928702414035797, "step": 4845 }, { "epoch": 13.267624914442163, "grad_norm": 9.24936580657959, "learning_rate": 3.3616438356164386e-07, "log_odds_chosen": 1.7331464290618896, "log_odds_ratio": -0.4778629243373871, "logits/chosen": 1.2787021398544312, "logits/rejected": 1.2825748920440674, "logps/chosen": -2.708508014678955, "logps/rejected": -4.299543857574463, "loss": 0.5867, "nll_loss": 0.5389029383659363, "rewards/accuracies": 0.75, "rewards/chosen": -0.2708507776260376, "rewards/margins": 0.1591036021709442, "rewards/rejected": -0.4299544095993042, "step": 4846 }, { "epoch": 13.270362765229295, "grad_norm": 3.6605148315429688, "learning_rate": 3.3602739726027396e-07, "log_odds_chosen": 2.518507957458496, "log_odds_ratio": -0.29197609424591064, "logits/chosen": 0.8740508556365967, "logits/rejected": 0.8360557556152344, "logps/chosen": -1.8631521463394165, "logps/rejected": -4.2653727531433105, "loss": 0.5819, "nll_loss": 0.5526679754257202, "rewards/accuracies": 0.875, "rewards/chosen": -0.18631520867347717, "rewards/margins": 0.24022206664085388, "rewards/rejected": -0.42653727531433105, "step": 4847 }, { "epoch": 13.273100616016427, "grad_norm": 3.4676010608673096, "learning_rate": 3.3589041095890406e-07, "log_odds_chosen": 4.305113315582275, "log_odds_ratio": -0.08857879787683487, "logits/chosen": 0.962848424911499, "logits/rejected": 0.9863814115524292, "logps/chosen": -1.846243143081665, "logps/rejected": -5.898477554321289, "loss": 0.6208, "nll_loss": 0.6118928790092468, "rewards/accuracies": 1.0, "rewards/chosen": -0.1846243143081665, "rewards/margins": 0.4052234888076782, "rewards/rejected": -0.5898478031158447, "step": 4848 }, { "epoch": 13.27583846680356, "grad_norm": 3.555903434753418, "learning_rate": 3.3575342465753426e-07, "log_odds_chosen": 2.4764907360076904, "log_odds_ratio": -0.21389640867710114, "logits/chosen": 1.3175891637802124, "logits/rejected": 1.2674843072891235, "logps/chosen": -1.8869030475616455, "logps/rejected": -4.187837600708008, "loss": 0.4696, "nll_loss": 0.4481775760650635, "rewards/accuracies": 1.0, "rewards/chosen": -0.18869030475616455, "rewards/margins": 0.23009343445301056, "rewards/rejected": -0.4187837541103363, "step": 4849 }, { "epoch": 13.27857631759069, "grad_norm": 3.7963712215423584, "learning_rate": 3.3561643835616436e-07, "log_odds_chosen": 1.9058024883270264, "log_odds_ratio": -0.2693621814250946, "logits/chosen": 0.8563852906227112, "logits/rejected": 0.8108287453651428, "logps/chosen": -1.6454726457595825, "logps/rejected": -3.4097096920013428, "loss": 0.4528, "nll_loss": 0.4258671700954437, "rewards/accuracies": 0.875, "rewards/chosen": -0.16454727947711945, "rewards/margins": 0.17642369866371155, "rewards/rejected": -0.3409709632396698, "step": 4850 }, { "epoch": 13.281314168377824, "grad_norm": 3.4990053176879883, "learning_rate": 3.354794520547945e-07, "log_odds_chosen": 2.9893558025360107, "log_odds_ratio": -0.31097057461738586, "logits/chosen": 0.8356773257255554, "logits/rejected": 0.8565056920051575, "logps/chosen": -2.088487148284912, "logps/rejected": -4.892602443695068, "loss": 0.5301, "nll_loss": 0.4989648163318634, "rewards/accuracies": 0.875, "rewards/chosen": -0.2088487148284912, "rewards/margins": 0.2804115414619446, "rewards/rejected": -0.4892602562904358, "step": 4851 }, { "epoch": 13.284052019164955, "grad_norm": 6.091332912445068, "learning_rate": 3.353424657534246e-07, "log_odds_chosen": 2.111334800720215, "log_odds_ratio": -0.5109017491340637, "logits/chosen": 1.1303049325942993, "logits/rejected": 1.1160227060317993, "logps/chosen": -2.0399351119995117, "logps/rejected": -3.990365982055664, "loss": 0.6807, "nll_loss": 0.6296448111534119, "rewards/accuracies": 0.75, "rewards/chosen": -0.2039935141801834, "rewards/margins": 0.19504308700561523, "rewards/rejected": -0.39903661608695984, "step": 4852 }, { "epoch": 13.286789869952088, "grad_norm": 4.591132164001465, "learning_rate": 3.352054794520548e-07, "log_odds_chosen": 3.0774950981140137, "log_odds_ratio": -0.18004269897937775, "logits/chosen": 1.2566416263580322, "logits/rejected": 1.3332931995391846, "logps/chosen": -2.6716480255126953, "logps/rejected": -5.660136699676514, "loss": 0.5521, "nll_loss": 0.5340878963470459, "rewards/accuracies": 1.0, "rewards/chosen": -0.26716479659080505, "rewards/margins": 0.29884886741638184, "rewards/rejected": -0.5660136938095093, "step": 4853 }, { "epoch": 13.289527720739219, "grad_norm": 6.571981906890869, "learning_rate": 3.350684931506849e-07, "log_odds_chosen": 3.9861934185028076, "log_odds_ratio": -0.31278669834136963, "logits/chosen": 1.2508331537246704, "logits/rejected": 1.3102036714553833, "logps/chosen": -2.1115338802337646, "logps/rejected": -5.870087623596191, "loss": 0.7053, "nll_loss": 0.6740115880966187, "rewards/accuracies": 0.875, "rewards/chosen": -0.21115338802337646, "rewards/margins": 0.375855416059494, "rewards/rejected": -0.5870088338851929, "step": 4854 }, { "epoch": 13.292265571526352, "grad_norm": 3.7438313961029053, "learning_rate": 3.34931506849315e-07, "log_odds_chosen": 3.1266162395477295, "log_odds_ratio": -0.24161764979362488, "logits/chosen": 0.9560638070106506, "logits/rejected": 0.9837678670883179, "logps/chosen": -2.193911075592041, "logps/rejected": -5.200738906860352, "loss": 0.5918, "nll_loss": 0.5676596164703369, "rewards/accuracies": 1.0, "rewards/chosen": -0.2193911224603653, "rewards/margins": 0.30068278312683105, "rewards/rejected": -0.5200738906860352, "step": 4855 }, { "epoch": 13.295003422313483, "grad_norm": 3.507042646408081, "learning_rate": 3.347945205479452e-07, "log_odds_chosen": 4.3477067947387695, "log_odds_ratio": -0.08116840571165085, "logits/chosen": 1.1259922981262207, "logits/rejected": 1.0868452787399292, "logps/chosen": -1.4905784130096436, "logps/rejected": -5.585353851318359, "loss": 0.5907, "nll_loss": 0.5825396180152893, "rewards/accuracies": 1.0, "rewards/chosen": -0.14905785024166107, "rewards/margins": 0.40947750210762024, "rewards/rejected": -0.5585353374481201, "step": 4856 }, { "epoch": 13.297741273100616, "grad_norm": 3.6278767585754395, "learning_rate": 3.346575342465753e-07, "log_odds_chosen": 5.220815658569336, "log_odds_ratio": -0.0187660101801157, "logits/chosen": 1.2311185598373413, "logits/rejected": 1.2750529050827026, "logps/chosen": -1.8804634809494019, "logps/rejected": -6.828343391418457, "loss": 0.724, "nll_loss": 0.7220916748046875, "rewards/accuracies": 1.0, "rewards/chosen": -0.18804633617401123, "rewards/margins": 0.4947879910469055, "rewards/rejected": -0.6828343272209167, "step": 4857 }, { "epoch": 13.300479123887747, "grad_norm": 4.331912040710449, "learning_rate": 3.345205479452055e-07, "log_odds_chosen": 3.198462963104248, "log_odds_ratio": -0.3454994857311249, "logits/chosen": 1.0580977201461792, "logits/rejected": 1.0309486389160156, "logps/chosen": -1.8141621351242065, "logps/rejected": -4.816640853881836, "loss": 0.6163, "nll_loss": 0.581757128238678, "rewards/accuracies": 0.875, "rewards/chosen": -0.18141621351242065, "rewards/margins": 0.3002478778362274, "rewards/rejected": -0.48166409134864807, "step": 4858 }, { "epoch": 13.30321697467488, "grad_norm": 3.432617425918579, "learning_rate": 3.343835616438356e-07, "log_odds_chosen": 2.3712339401245117, "log_odds_ratio": -0.17232461273670197, "logits/chosen": 1.3918684720993042, "logits/rejected": 1.43682861328125, "logps/chosen": -2.050380229949951, "logps/rejected": -4.164634704589844, "loss": 0.5215, "nll_loss": 0.5042527914047241, "rewards/accuracies": 1.0, "rewards/chosen": -0.20503802597522736, "rewards/margins": 0.21142545342445374, "rewards/rejected": -0.4164634644985199, "step": 4859 }, { "epoch": 13.305954825462011, "grad_norm": 3.720667600631714, "learning_rate": 3.342465753424658e-07, "log_odds_chosen": 2.4195938110351562, "log_odds_ratio": -0.23497065901756287, "logits/chosen": 0.9907066822052002, "logits/rejected": 0.8570104837417603, "logps/chosen": -1.903043508529663, "logps/rejected": -4.146137237548828, "loss": 0.5939, "nll_loss": 0.5704017877578735, "rewards/accuracies": 1.0, "rewards/chosen": -0.19030433893203735, "rewards/margins": 0.22430938482284546, "rewards/rejected": -0.4146137237548828, "step": 4860 }, { "epoch": 13.308692676249144, "grad_norm": 3.389493942260742, "learning_rate": 3.341095890410959e-07, "log_odds_chosen": 2.2065858840942383, "log_odds_ratio": -0.17632561922073364, "logits/chosen": 1.3992865085601807, "logits/rejected": 1.398358702659607, "logps/chosen": -2.1531317234039307, "logps/rejected": -4.228135108947754, "loss": 0.5558, "nll_loss": 0.5382167100906372, "rewards/accuracies": 1.0, "rewards/chosen": -0.2153131663799286, "rewards/margins": 0.2075003981590271, "rewards/rejected": -0.4228135645389557, "step": 4861 }, { "epoch": 13.311430527036277, "grad_norm": 4.015266418457031, "learning_rate": 3.33972602739726e-07, "log_odds_chosen": 4.01938533782959, "log_odds_ratio": -0.13730385899543762, "logits/chosen": 1.087062120437622, "logits/rejected": 1.1246442794799805, "logps/chosen": -1.9192595481872559, "logps/rejected": -5.662142276763916, "loss": 0.583, "nll_loss": 0.5692367553710938, "rewards/accuracies": 1.0, "rewards/chosen": -0.19192597270011902, "rewards/margins": 0.37428826093673706, "rewards/rejected": -0.5662142038345337, "step": 4862 }, { "epoch": 13.314168377823409, "grad_norm": 6.852743148803711, "learning_rate": 3.338356164383562e-07, "log_odds_chosen": 4.81532096862793, "log_odds_ratio": -0.2626839280128479, "logits/chosen": 1.1745421886444092, "logits/rejected": 1.197528600692749, "logps/chosen": -2.1822221279144287, "logps/rejected": -6.865076065063477, "loss": 0.6656, "nll_loss": 0.6393027305603027, "rewards/accuracies": 0.875, "rewards/chosen": -0.2182222306728363, "rewards/margins": 0.4682854115962982, "rewards/rejected": -0.6865076422691345, "step": 4863 }, { "epoch": 13.316906228610542, "grad_norm": 5.88792085647583, "learning_rate": 3.336986301369863e-07, "log_odds_chosen": 3.658689498901367, "log_odds_ratio": -0.2058437168598175, "logits/chosen": 1.0987663269042969, "logits/rejected": 1.048698902130127, "logps/chosen": -1.9683644771575928, "logps/rejected": -5.446346759796143, "loss": 0.7012, "nll_loss": 0.6805866360664368, "rewards/accuracies": 1.0, "rewards/chosen": -0.1968364417552948, "rewards/margins": 0.3477981984615326, "rewards/rejected": -0.5446346402168274, "step": 4864 }, { "epoch": 13.319644079397673, "grad_norm": 5.6261887550354, "learning_rate": 3.3356164383561643e-07, "log_odds_chosen": 1.8286601305007935, "log_odds_ratio": -0.4194687604904175, "logits/chosen": 1.053139567375183, "logits/rejected": 1.0207204818725586, "logps/chosen": -2.7516379356384277, "logps/rejected": -4.476644992828369, "loss": 0.7398, "nll_loss": 0.6978470087051392, "rewards/accuracies": 0.875, "rewards/chosen": -0.27516379952430725, "rewards/margins": 0.17250066995620728, "rewards/rejected": -0.4476644694805145, "step": 4865 }, { "epoch": 13.322381930184806, "grad_norm": 5.987917423248291, "learning_rate": 3.3342465753424653e-07, "log_odds_chosen": 1.5243117809295654, "log_odds_ratio": -0.338279664516449, "logits/chosen": 1.0923717021942139, "logits/rejected": 1.0972996950149536, "logps/chosen": -2.0723445415496826, "logps/rejected": -3.471489906311035, "loss": 0.5783, "nll_loss": 0.5444568991661072, "rewards/accuracies": 0.75, "rewards/chosen": -0.2072344869375229, "rewards/margins": 0.13991449773311615, "rewards/rejected": -0.34714895486831665, "step": 4866 }, { "epoch": 13.325119780971937, "grad_norm": 4.672399044036865, "learning_rate": 3.332876712328767e-07, "log_odds_chosen": 1.2057993412017822, "log_odds_ratio": -0.3751661777496338, "logits/chosen": 0.9666973948478699, "logits/rejected": 1.0106728076934814, "logps/chosen": -2.8236751556396484, "logps/rejected": -3.95524263381958, "loss": 0.6026, "nll_loss": 0.5651044249534607, "rewards/accuracies": 0.875, "rewards/chosen": -0.2823675274848938, "rewards/margins": 0.11315670609474182, "rewards/rejected": -0.3955242335796356, "step": 4867 }, { "epoch": 13.32785763175907, "grad_norm": 3.0924410820007324, "learning_rate": 3.3315068493150684e-07, "log_odds_chosen": 3.138855218887329, "log_odds_ratio": -0.18251849710941315, "logits/chosen": 0.8888869285583496, "logits/rejected": 0.8651236891746521, "logps/chosen": -1.2533252239227295, "logps/rejected": -4.075351715087891, "loss": 0.4969, "nll_loss": 0.4786272644996643, "rewards/accuracies": 1.0, "rewards/chosen": -0.1253325343132019, "rewards/margins": 0.28220266103744507, "rewards/rejected": -0.407535195350647, "step": 4868 }, { "epoch": 13.330595482546201, "grad_norm": 3.4799747467041016, "learning_rate": 3.3301369863013694e-07, "log_odds_chosen": 2.820146083831787, "log_odds_ratio": -0.1602782905101776, "logits/chosen": 0.8604219555854797, "logits/rejected": 0.8168838620185852, "logps/chosen": -1.3710881471633911, "logps/rejected": -3.908627986907959, "loss": 0.4561, "nll_loss": 0.44012075662612915, "rewards/accuracies": 1.0, "rewards/chosen": -0.13710881769657135, "rewards/margins": 0.2537539601325989, "rewards/rejected": -0.3908627927303314, "step": 4869 }, { "epoch": 13.333333333333334, "grad_norm": 5.53310489654541, "learning_rate": 3.3287671232876714e-07, "log_odds_chosen": 2.776120662689209, "log_odds_ratio": -0.2771579325199127, "logits/chosen": 1.0137916803359985, "logits/rejected": 0.9969198703765869, "logps/chosen": -2.2692253589630127, "logps/rejected": -4.95265007019043, "loss": 0.5508, "nll_loss": 0.5230469107627869, "rewards/accuracies": 1.0, "rewards/chosen": -0.22692254185676575, "rewards/margins": 0.26834243535995483, "rewards/rejected": -0.49526500701904297, "step": 4870 }, { "epoch": 13.336071184120465, "grad_norm": 3.3546955585479736, "learning_rate": 3.3273972602739724e-07, "log_odds_chosen": 4.179664134979248, "log_odds_ratio": -0.08335800468921661, "logits/chosen": 1.3027756214141846, "logits/rejected": 1.235332727432251, "logps/chosen": -1.8616693019866943, "logps/rejected": -5.808712959289551, "loss": 0.4968, "nll_loss": 0.48849067091941833, "rewards/accuracies": 1.0, "rewards/chosen": -0.1861669421195984, "rewards/margins": 0.3947043716907501, "rewards/rejected": -0.5808712840080261, "step": 4871 }, { "epoch": 13.338809034907598, "grad_norm": 4.1101155281066895, "learning_rate": 3.326027397260274e-07, "log_odds_chosen": 1.413874864578247, "log_odds_ratio": -0.30698010325431824, "logits/chosen": 0.9733560085296631, "logits/rejected": 0.9163438081741333, "logps/chosen": -1.7226282358169556, "logps/rejected": -2.977344036102295, "loss": 0.478, "nll_loss": 0.4473118185997009, "rewards/accuracies": 0.875, "rewards/chosen": -0.17226283252239227, "rewards/margins": 0.1254715919494629, "rewards/rejected": -0.29773443937301636, "step": 4872 }, { "epoch": 13.34154688569473, "grad_norm": 3.3706214427948, "learning_rate": 3.3246575342465754e-07, "log_odds_chosen": 2.285766363143921, "log_odds_ratio": -0.1371506303548813, "logits/chosen": 1.13214910030365, "logits/rejected": 1.132283091545105, "logps/chosen": -1.8952713012695312, "logps/rejected": -4.005034923553467, "loss": 0.5322, "nll_loss": 0.5185193419456482, "rewards/accuracies": 1.0, "rewards/chosen": -0.18952712416648865, "rewards/margins": 0.21097637712955475, "rewards/rejected": -0.4005034863948822, "step": 4873 }, { "epoch": 13.344284736481862, "grad_norm": 3.832911968231201, "learning_rate": 3.3232876712328764e-07, "log_odds_chosen": 3.1187846660614014, "log_odds_ratio": -0.19997988641262054, "logits/chosen": 0.8293083906173706, "logits/rejected": 0.8388216495513916, "logps/chosen": -1.1884515285491943, "logps/rejected": -3.9827704429626465, "loss": 0.4655, "nll_loss": 0.4455026090145111, "rewards/accuracies": 1.0, "rewards/chosen": -0.11884516477584839, "rewards/margins": 0.27943187952041626, "rewards/rejected": -0.39827704429626465, "step": 4874 }, { "epoch": 13.347022587268993, "grad_norm": 3.17268705368042, "learning_rate": 3.321917808219178e-07, "log_odds_chosen": 2.7456014156341553, "log_odds_ratio": -0.19358611106872559, "logits/chosen": 1.1951619386672974, "logits/rejected": 1.219088077545166, "logps/chosen": -1.7369167804718018, "logps/rejected": -4.231509208679199, "loss": 0.4907, "nll_loss": 0.47131845355033875, "rewards/accuracies": 0.875, "rewards/chosen": -0.17369168996810913, "rewards/margins": 0.24945920705795288, "rewards/rejected": -0.423150897026062, "step": 4875 }, { "epoch": 13.349760438056126, "grad_norm": 7.752294063568115, "learning_rate": 3.320547945205479e-07, "log_odds_chosen": 1.6099612712860107, "log_odds_ratio": -0.4022902250289917, "logits/chosen": 1.2572821378707886, "logits/rejected": 1.213333010673523, "logps/chosen": -2.9092307090759277, "logps/rejected": -4.405775547027588, "loss": 0.6276, "nll_loss": 0.5873554944992065, "rewards/accuracies": 0.875, "rewards/chosen": -0.2909231185913086, "rewards/margins": 0.14965447783470154, "rewards/rejected": -0.44057756662368774, "step": 4876 }, { "epoch": 13.352498288843258, "grad_norm": 4.887049674987793, "learning_rate": 3.319178082191781e-07, "log_odds_chosen": 2.8029837608337402, "log_odds_ratio": -0.1450221985578537, "logits/chosen": 1.1062724590301514, "logits/rejected": 1.0457221269607544, "logps/chosen": -2.145185708999634, "logps/rejected": -4.766260623931885, "loss": 0.6696, "nll_loss": 0.6551046371459961, "rewards/accuracies": 1.0, "rewards/chosen": -0.21451857686042786, "rewards/margins": 0.2621074914932251, "rewards/rejected": -0.47662606835365295, "step": 4877 }, { "epoch": 13.35523613963039, "grad_norm": 3.4607765674591064, "learning_rate": 3.317808219178082e-07, "log_odds_chosen": 3.000779390335083, "log_odds_ratio": -0.11346986889839172, "logits/chosen": 0.9189388751983643, "logits/rejected": 0.9206511974334717, "logps/chosen": -1.6478071212768555, "logps/rejected": -4.345433235168457, "loss": 0.4968, "nll_loss": 0.4854919910430908, "rewards/accuracies": 1.0, "rewards/chosen": -0.16478070616722107, "rewards/margins": 0.26976263523101807, "rewards/rejected": -0.43454334139823914, "step": 4878 }, { "epoch": 13.357973990417522, "grad_norm": 3.2655110359191895, "learning_rate": 3.3164383561643835e-07, "log_odds_chosen": 3.561483383178711, "log_odds_ratio": -0.28235194087028503, "logits/chosen": 0.6332135200500488, "logits/rejected": 0.7410190105438232, "logps/chosen": -1.9201903343200684, "logps/rejected": -5.314240455627441, "loss": 0.5479, "nll_loss": 0.5196788311004639, "rewards/accuracies": 0.875, "rewards/chosen": -0.1920190304517746, "rewards/margins": 0.33940500020980835, "rewards/rejected": -0.5314240455627441, "step": 4879 }, { "epoch": 13.360711841204655, "grad_norm": 8.104216575622559, "learning_rate": 3.315068493150685e-07, "log_odds_chosen": 2.2944297790527344, "log_odds_ratio": -0.21445327997207642, "logits/chosen": 1.3433711528778076, "logits/rejected": 1.292741060256958, "logps/chosen": -2.2790896892547607, "logps/rejected": -4.4509596824646, "loss": 0.644, "nll_loss": 0.6225674152374268, "rewards/accuracies": 1.0, "rewards/chosen": -0.22790899872779846, "rewards/margins": 0.21718698740005493, "rewards/rejected": -0.4450959265232086, "step": 4880 }, { "epoch": 13.363449691991786, "grad_norm": 3.8826980590820312, "learning_rate": 3.313698630136986e-07, "log_odds_chosen": 1.914674997329712, "log_odds_ratio": -0.4300489127635956, "logits/chosen": 0.8264513611793518, "logits/rejected": 0.8473142385482788, "logps/chosen": -2.556204319000244, "logps/rejected": -4.348879814147949, "loss": 0.7197, "nll_loss": 0.6767061948776245, "rewards/accuracies": 0.875, "rewards/chosen": -0.25562041997909546, "rewards/margins": 0.1792675405740738, "rewards/rejected": -0.43488797545433044, "step": 4881 }, { "epoch": 13.366187542778919, "grad_norm": 5.4686665534973145, "learning_rate": 3.3123287671232875e-07, "log_odds_chosen": 3.062375545501709, "log_odds_ratio": -0.19828204810619354, "logits/chosen": 0.9260411858558655, "logits/rejected": 0.8619321584701538, "logps/chosen": -2.269416093826294, "logps/rejected": -5.151383399963379, "loss": 0.6624, "nll_loss": 0.6425927877426147, "rewards/accuracies": 1.0, "rewards/chosen": -0.22694161534309387, "rewards/margins": 0.28819671273231506, "rewards/rejected": -0.5151383280754089, "step": 4882 }, { "epoch": 13.36892539356605, "grad_norm": 4.068176746368408, "learning_rate": 3.3109589041095885e-07, "log_odds_chosen": 1.9432556629180908, "log_odds_ratio": -0.23254923522472382, "logits/chosen": 1.0481727123260498, "logits/rejected": 0.9568948745727539, "logps/chosen": -2.2666475772857666, "logps/rejected": -4.067408084869385, "loss": 0.5177, "nll_loss": 0.49448463320732117, "rewards/accuracies": 0.875, "rewards/chosen": -0.22666475176811218, "rewards/margins": 0.18007606267929077, "rewards/rejected": -0.40674081444740295, "step": 4883 }, { "epoch": 13.371663244353183, "grad_norm": 4.906710147857666, "learning_rate": 3.3095890410958906e-07, "log_odds_chosen": 3.107607126235962, "log_odds_ratio": -0.20174771547317505, "logits/chosen": 0.8269054889678955, "logits/rejected": 0.7710939049720764, "logps/chosen": -2.0694096088409424, "logps/rejected": -5.02341365814209, "loss": 0.6737, "nll_loss": 0.6535420417785645, "rewards/accuracies": 1.0, "rewards/chosen": -0.20694094896316528, "rewards/margins": 0.29540038108825684, "rewards/rejected": -0.5023413300514221, "step": 4884 }, { "epoch": 13.374401095140314, "grad_norm": 3.9120986461639404, "learning_rate": 3.3082191780821916e-07, "log_odds_chosen": 2.220353364944458, "log_odds_ratio": -0.21462371945381165, "logits/chosen": 1.1592460870742798, "logits/rejected": 1.108392596244812, "logps/chosen": -1.830592393875122, "logps/rejected": -3.9169273376464844, "loss": 0.5971, "nll_loss": 0.575641393661499, "rewards/accuracies": 1.0, "rewards/chosen": -0.1830592304468155, "rewards/margins": 0.20863351225852966, "rewards/rejected": -0.39169275760650635, "step": 4885 }, { "epoch": 13.377138945927447, "grad_norm": 3.7885382175445557, "learning_rate": 3.306849315068493e-07, "log_odds_chosen": 1.9237476587295532, "log_odds_ratio": -0.2447885274887085, "logits/chosen": 0.9460709095001221, "logits/rejected": 0.9240493774414062, "logps/chosen": -2.1069235801696777, "logps/rejected": -3.9383087158203125, "loss": 0.5851, "nll_loss": 0.5605852603912354, "rewards/accuracies": 0.875, "rewards/chosen": -0.21069234609603882, "rewards/margins": 0.18313853442668915, "rewards/rejected": -0.39383089542388916, "step": 4886 }, { "epoch": 13.37987679671458, "grad_norm": 3.7524924278259277, "learning_rate": 3.3054794520547946e-07, "log_odds_chosen": 4.246766090393066, "log_odds_ratio": -0.08934737741947174, "logits/chosen": 1.0994970798492432, "logits/rejected": 1.0809334516525269, "logps/chosen": -2.525526285171509, "logps/rejected": -6.514671325683594, "loss": 0.5314, "nll_loss": 0.5224853754043579, "rewards/accuracies": 1.0, "rewards/chosen": -0.2525526285171509, "rewards/margins": 0.39891454577445984, "rewards/rejected": -0.6514671444892883, "step": 4887 }, { "epoch": 13.382614647501711, "grad_norm": 5.302473068237305, "learning_rate": 3.3041095890410956e-07, "log_odds_chosen": 4.000568389892578, "log_odds_ratio": -0.18082961440086365, "logits/chosen": 1.1805957555770874, "logits/rejected": 1.1889588832855225, "logps/chosen": -2.3344390392303467, "logps/rejected": -6.211124420166016, "loss": 0.6419, "nll_loss": 0.6238294243812561, "rewards/accuracies": 1.0, "rewards/chosen": -0.23344390094280243, "rewards/margins": 0.38766855001449585, "rewards/rejected": -0.6211124658584595, "step": 4888 }, { "epoch": 13.385352498288844, "grad_norm": 3.2428462505340576, "learning_rate": 3.302739726027397e-07, "log_odds_chosen": 3.845808267593384, "log_odds_ratio": -0.08311760425567627, "logits/chosen": 1.1559064388275146, "logits/rejected": 1.2050623893737793, "logps/chosen": -2.4388017654418945, "logps/rejected": -6.093212127685547, "loss": 0.5791, "nll_loss": 0.5707808136940002, "rewards/accuracies": 1.0, "rewards/chosen": -0.24388015270233154, "rewards/margins": 0.36544108390808105, "rewards/rejected": -0.6093212366104126, "step": 4889 }, { "epoch": 13.388090349075975, "grad_norm": 3.9600162506103516, "learning_rate": 3.301369863013698e-07, "log_odds_chosen": 2.451964855194092, "log_odds_ratio": -0.25471240282058716, "logits/chosen": 1.2006486654281616, "logits/rejected": 1.243487000465393, "logps/chosen": -2.1233201026916504, "logps/rejected": -4.454594135284424, "loss": 0.5816, "nll_loss": 0.5561598539352417, "rewards/accuracies": 0.875, "rewards/chosen": -0.21233201026916504, "rewards/margins": 0.2331273853778839, "rewards/rejected": -0.44545939564704895, "step": 4890 }, { "epoch": 13.390828199863108, "grad_norm": 5.928768634796143, "learning_rate": 3.3e-07, "log_odds_chosen": 2.6430325508117676, "log_odds_ratio": -0.3267139792442322, "logits/chosen": 0.9922935962677002, "logits/rejected": 0.9779403209686279, "logps/chosen": -2.4739270210266113, "logps/rejected": -4.991347789764404, "loss": 0.5871, "nll_loss": 0.5544321537017822, "rewards/accuracies": 0.875, "rewards/chosen": -0.2473927140235901, "rewards/margins": 0.25174206495285034, "rewards/rejected": -0.49913477897644043, "step": 4891 }, { "epoch": 13.39356605065024, "grad_norm": 3.58501935005188, "learning_rate": 3.298630136986301e-07, "log_odds_chosen": 3.464665412902832, "log_odds_ratio": -0.2307455986738205, "logits/chosen": 0.9057145118713379, "logits/rejected": 0.9113304615020752, "logps/chosen": -1.939448595046997, "logps/rejected": -5.221298694610596, "loss": 0.6515, "nll_loss": 0.6283788681030273, "rewards/accuracies": 1.0, "rewards/chosen": -0.19394487142562866, "rewards/margins": 0.32818499207496643, "rewards/rejected": -0.5221298933029175, "step": 4892 }, { "epoch": 13.396303901437372, "grad_norm": 3.9104790687561035, "learning_rate": 3.2972602739726027e-07, "log_odds_chosen": 3.936563014984131, "log_odds_ratio": -0.0698670819401741, "logits/chosen": 1.0865134000778198, "logits/rejected": 1.1809993982315063, "logps/chosen": -1.814855694770813, "logps/rejected": -5.403392314910889, "loss": 0.6796, "nll_loss": 0.6726261973381042, "rewards/accuracies": 1.0, "rewards/chosen": -0.18148556351661682, "rewards/margins": 0.35885369777679443, "rewards/rejected": -0.5403392314910889, "step": 4893 }, { "epoch": 13.399041752224504, "grad_norm": 6.394491672515869, "learning_rate": 3.295890410958904e-07, "log_odds_chosen": 1.313309907913208, "log_odds_ratio": -0.4211727976799011, "logits/chosen": 0.9689269065856934, "logits/rejected": 0.966349720954895, "logps/chosen": -3.259572982788086, "logps/rejected": -4.486428260803223, "loss": 0.7016, "nll_loss": 0.6595211029052734, "rewards/accuracies": 0.75, "rewards/chosen": -0.325957328081131, "rewards/margins": 0.122685506939888, "rewards/rejected": -0.4486427903175354, "step": 4894 }, { "epoch": 13.401779603011637, "grad_norm": 6.400148391723633, "learning_rate": 3.294520547945205e-07, "log_odds_chosen": 2.4016122817993164, "log_odds_ratio": -0.30092984437942505, "logits/chosen": 0.8490727543830872, "logits/rejected": 0.7953795790672302, "logps/chosen": -1.6743292808532715, "logps/rejected": -3.8475852012634277, "loss": 0.5393, "nll_loss": 0.5091840028762817, "rewards/accuracies": 0.875, "rewards/chosen": -0.16743293404579163, "rewards/margins": 0.2173255831003189, "rewards/rejected": -0.38475853204727173, "step": 4895 }, { "epoch": 13.404517453798768, "grad_norm": 3.4086902141571045, "learning_rate": 3.2931506849315067e-07, "log_odds_chosen": 2.6160407066345215, "log_odds_ratio": -0.21127201616764069, "logits/chosen": 1.1618788242340088, "logits/rejected": 1.1165521144866943, "logps/chosen": -1.2529507875442505, "logps/rejected": -3.571171998977661, "loss": 0.479, "nll_loss": 0.45788902044296265, "rewards/accuracies": 1.0, "rewards/chosen": -0.12529508769512177, "rewards/margins": 0.23182210326194763, "rewards/rejected": -0.3571172058582306, "step": 4896 }, { "epoch": 13.4072553045859, "grad_norm": 6.515876770019531, "learning_rate": 3.2917808219178077e-07, "log_odds_chosen": 2.588289737701416, "log_odds_ratio": -0.3434193730354309, "logits/chosen": 1.3946095705032349, "logits/rejected": 1.4031779766082764, "logps/chosen": -1.718583106994629, "logps/rejected": -4.110918998718262, "loss": 0.5566, "nll_loss": 0.5222441554069519, "rewards/accuracies": 0.875, "rewards/chosen": -0.1718583106994629, "rewards/margins": 0.2392335832118988, "rewards/rejected": -0.4110919237136841, "step": 4897 }, { "epoch": 13.409993155373032, "grad_norm": 3.8767852783203125, "learning_rate": 3.29041095890411e-07, "log_odds_chosen": 6.4963579177856445, "log_odds_ratio": -0.07302030175924301, "logits/chosen": 1.0916613340377808, "logits/rejected": 1.132414698600769, "logps/chosen": -2.0015101432800293, "logps/rejected": -8.33000373840332, "loss": 0.5535, "nll_loss": 0.5461596250534058, "rewards/accuracies": 1.0, "rewards/chosen": -0.20015102624893188, "rewards/margins": 0.6328493356704712, "rewards/rejected": -0.8330003619194031, "step": 4898 }, { "epoch": 13.412731006160165, "grad_norm": 8.722953796386719, "learning_rate": 3.289041095890411e-07, "log_odds_chosen": 1.280236840248108, "log_odds_ratio": -0.6050172448158264, "logits/chosen": 0.9576776027679443, "logits/rejected": 0.8822197914123535, "logps/chosen": -2.3148062229156494, "logps/rejected": -3.446408748626709, "loss": 0.6116, "nll_loss": 0.5510619878768921, "rewards/accuracies": 0.75, "rewards/chosen": -0.23148062825202942, "rewards/margins": 0.11316023766994476, "rewards/rejected": -0.344640851020813, "step": 4899 }, { "epoch": 13.415468856947296, "grad_norm": 3.5006067752838135, "learning_rate": 3.287671232876712e-07, "log_odds_chosen": 4.444171905517578, "log_odds_ratio": -0.10147823393344879, "logits/chosen": 1.1761720180511475, "logits/rejected": 1.1875691413879395, "logps/chosen": -1.7139606475830078, "logps/rejected": -5.892971992492676, "loss": 0.5469, "nll_loss": 0.5367534756660461, "rewards/accuracies": 1.0, "rewards/chosen": -0.17139607667922974, "rewards/margins": 0.4179011285305023, "rewards/rejected": -0.5892971754074097, "step": 4900 }, { "epoch": 13.418206707734429, "grad_norm": 3.3673863410949707, "learning_rate": 3.286301369863014e-07, "log_odds_chosen": 3.3988592624664307, "log_odds_ratio": -0.12972769141197205, "logits/chosen": 1.0025075674057007, "logits/rejected": 0.9401929378509521, "logps/chosen": -1.1241790056228638, "logps/rejected": -4.108551502227783, "loss": 0.455, "nll_loss": 0.44204387068748474, "rewards/accuracies": 1.0, "rewards/chosen": -0.11241790652275085, "rewards/margins": 0.298437237739563, "rewards/rejected": -0.41085517406463623, "step": 4901 }, { "epoch": 13.42094455852156, "grad_norm": 5.990121841430664, "learning_rate": 3.284931506849315e-07, "log_odds_chosen": 2.8603124618530273, "log_odds_ratio": -0.5623029470443726, "logits/chosen": 1.0142335891723633, "logits/rejected": 0.9963372945785522, "logps/chosen": -1.7503697872161865, "logps/rejected": -4.561071872711182, "loss": 0.6185, "nll_loss": 0.5622925162315369, "rewards/accuracies": 0.75, "rewards/chosen": -0.1750369668006897, "rewards/margins": 0.2810702323913574, "rewards/rejected": -0.4561072587966919, "step": 4902 }, { "epoch": 13.423682409308693, "grad_norm": 3.389286756515503, "learning_rate": 3.2835616438356163e-07, "log_odds_chosen": 3.492971420288086, "log_odds_ratio": -0.18928688764572144, "logits/chosen": 0.8948471546173096, "logits/rejected": 0.8652392625808716, "logps/chosen": -2.3076159954071045, "logps/rejected": -5.664889335632324, "loss": 0.6018, "nll_loss": 0.5828599333763123, "rewards/accuracies": 1.0, "rewards/chosen": -0.2307616025209427, "rewards/margins": 0.335727334022522, "rewards/rejected": -0.5664889812469482, "step": 4903 }, { "epoch": 13.426420260095824, "grad_norm": 5.661614418029785, "learning_rate": 3.2821917808219173e-07, "log_odds_chosen": 4.511737823486328, "log_odds_ratio": -0.07102738320827484, "logits/chosen": 0.9716001749038696, "logits/rejected": 0.9368875622749329, "logps/chosen": -2.4248404502868652, "logps/rejected": -6.7383527755737305, "loss": 0.6184, "nll_loss": 0.6112731695175171, "rewards/accuracies": 1.0, "rewards/chosen": -0.24248403310775757, "rewards/margins": 0.43135130405426025, "rewards/rejected": -0.6738353371620178, "step": 4904 }, { "epoch": 13.429158110882957, "grad_norm": 3.655640125274658, "learning_rate": 3.2808219178082193e-07, "log_odds_chosen": 2.9496359825134277, "log_odds_ratio": -0.21357619762420654, "logits/chosen": 1.5096931457519531, "logits/rejected": 1.4839956760406494, "logps/chosen": -1.6856005191802979, "logps/rejected": -4.436333656311035, "loss": 0.527, "nll_loss": 0.5056125521659851, "rewards/accuracies": 1.0, "rewards/chosen": -0.16856005787849426, "rewards/margins": 0.2750733494758606, "rewards/rejected": -0.44363337755203247, "step": 4905 }, { "epoch": 13.431895961670088, "grad_norm": 3.8529162406921387, "learning_rate": 3.2794520547945203e-07, "log_odds_chosen": 2.0283656120300293, "log_odds_ratio": -0.26908552646636963, "logits/chosen": 0.9505388140678406, "logits/rejected": 0.9425041675567627, "logps/chosen": -1.4208050966262817, "logps/rejected": -3.1264219284057617, "loss": 0.4367, "nll_loss": 0.40978914499282837, "rewards/accuracies": 1.0, "rewards/chosen": -0.14208051562309265, "rewards/margins": 0.17056167125701904, "rewards/rejected": -0.3126421868801117, "step": 4906 }, { "epoch": 13.434633812457221, "grad_norm": 3.812096357345581, "learning_rate": 3.2780821917808213e-07, "log_odds_chosen": 2.1615536212921143, "log_odds_ratio": -0.22217673063278198, "logits/chosen": 1.1218373775482178, "logits/rejected": 1.0597602128982544, "logps/chosen": -1.611081838607788, "logps/rejected": -3.5939130783081055, "loss": 0.5032, "nll_loss": 0.4810221195220947, "rewards/accuracies": 1.0, "rewards/chosen": -0.16110816597938538, "rewards/margins": 0.1982831209897995, "rewards/rejected": -0.3593912720680237, "step": 4907 }, { "epoch": 13.437371663244353, "grad_norm": 3.965414047241211, "learning_rate": 3.2767123287671234e-07, "log_odds_chosen": 1.79996657371521, "log_odds_ratio": -0.2373296469449997, "logits/chosen": 1.059477686882019, "logits/rejected": 1.1038663387298584, "logps/chosen": -2.238839626312256, "logps/rejected": -3.9185266494750977, "loss": 0.5885, "nll_loss": 0.5647611021995544, "rewards/accuracies": 0.875, "rewards/chosen": -0.2238840013742447, "rewards/margins": 0.16796867549419403, "rewards/rejected": -0.3918526768684387, "step": 4908 }, { "epoch": 13.440109514031485, "grad_norm": 4.102447032928467, "learning_rate": 3.2753424657534244e-07, "log_odds_chosen": 2.1259047985076904, "log_odds_ratio": -0.19697923958301544, "logits/chosen": 1.1556789875030518, "logits/rejected": 1.1433964967727661, "logps/chosen": -1.6943166255950928, "logps/rejected": -3.584036350250244, "loss": 0.5485, "nll_loss": 0.5288165807723999, "rewards/accuracies": 1.0, "rewards/chosen": -0.1694316864013672, "rewards/margins": 0.18897196650505066, "rewards/rejected": -0.35840362310409546, "step": 4909 }, { "epoch": 13.442847364818617, "grad_norm": 3.4657673835754395, "learning_rate": 3.273972602739726e-07, "log_odds_chosen": 2.803687334060669, "log_odds_ratio": -0.16432461142539978, "logits/chosen": 0.8441625833511353, "logits/rejected": 0.8125418424606323, "logps/chosen": -1.3264869451522827, "logps/rejected": -3.8554632663726807, "loss": 0.421, "nll_loss": 0.40459567308425903, "rewards/accuracies": 1.0, "rewards/chosen": -0.13264869153499603, "rewards/margins": 0.25289762020111084, "rewards/rejected": -0.3855462968349457, "step": 4910 }, { "epoch": 13.44558521560575, "grad_norm": 3.5561306476593018, "learning_rate": 3.2726027397260274e-07, "log_odds_chosen": 1.8772292137145996, "log_odds_ratio": -0.2188303917646408, "logits/chosen": 1.2429710626602173, "logits/rejected": 1.1732524633407593, "logps/chosen": -1.6869972944259644, "logps/rejected": -3.358306407928467, "loss": 0.4631, "nll_loss": 0.4412267208099365, "rewards/accuracies": 1.0, "rewards/chosen": -0.1686997413635254, "rewards/margins": 0.16713091731071472, "rewards/rejected": -0.3358306288719177, "step": 4911 }, { "epoch": 13.44832306639288, "grad_norm": 3.87324595451355, "learning_rate": 3.271232876712329e-07, "log_odds_chosen": 2.1144936084747314, "log_odds_ratio": -0.23340439796447754, "logits/chosen": 1.004242181777954, "logits/rejected": 1.0072872638702393, "logps/chosen": -1.7387572526931763, "logps/rejected": -3.6090056896209717, "loss": 0.4428, "nll_loss": 0.4194797873497009, "rewards/accuracies": 1.0, "rewards/chosen": -0.17387571930885315, "rewards/margins": 0.18702484667301178, "rewards/rejected": -0.3609005808830261, "step": 4912 }, { "epoch": 13.451060917180014, "grad_norm": 3.4721486568450928, "learning_rate": 3.26986301369863e-07, "log_odds_chosen": 4.079130172729492, "log_odds_ratio": -0.07974202185869217, "logits/chosen": 0.9479578137397766, "logits/rejected": 0.933784008026123, "logps/chosen": -1.4924190044403076, "logps/rejected": -5.275125026702881, "loss": 0.42, "nll_loss": 0.4119773507118225, "rewards/accuracies": 1.0, "rewards/chosen": -0.14924190938472748, "rewards/margins": 0.37827062606811523, "rewards/rejected": -0.5275125503540039, "step": 4913 }, { "epoch": 13.453798767967147, "grad_norm": 4.27956485748291, "learning_rate": 3.268493150684931e-07, "log_odds_chosen": 3.594212532043457, "log_odds_ratio": -0.18104121088981628, "logits/chosen": 0.9898066520690918, "logits/rejected": 0.9354260563850403, "logps/chosen": -1.5720504522323608, "logps/rejected": -4.901425361633301, "loss": 0.6628, "nll_loss": 0.6446546316146851, "rewards/accuracies": 1.0, "rewards/chosen": -0.15720504522323608, "rewards/margins": 0.3329375088214874, "rewards/rejected": -0.4901425242424011, "step": 4914 }, { "epoch": 13.456536618754278, "grad_norm": 5.882359981536865, "learning_rate": 3.267123287671233e-07, "log_odds_chosen": 0.7549060583114624, "log_odds_ratio": -0.5295515656471252, "logits/chosen": 1.1876715421676636, "logits/rejected": 1.266257405281067, "logps/chosen": -2.748751163482666, "logps/rejected": -3.398463726043701, "loss": 0.6274, "nll_loss": 0.5744766592979431, "rewards/accuracies": 0.875, "rewards/chosen": -0.27487510442733765, "rewards/margins": 0.06497126817703247, "rewards/rejected": -0.3398463726043701, "step": 4915 }, { "epoch": 13.45927446954141, "grad_norm": 3.2934162616729736, "learning_rate": 3.265753424657534e-07, "log_odds_chosen": 2.1431403160095215, "log_odds_ratio": -0.14865843951702118, "logits/chosen": 1.114288330078125, "logits/rejected": 1.0743777751922607, "logps/chosen": -1.700386643409729, "logps/rejected": -3.6483237743377686, "loss": 0.4653, "nll_loss": 0.45038917660713196, "rewards/accuracies": 1.0, "rewards/chosen": -0.17003867030143738, "rewards/margins": 0.1947937309741974, "rewards/rejected": -0.36483240127563477, "step": 4916 }, { "epoch": 13.462012320328542, "grad_norm": 3.454946279525757, "learning_rate": 3.2643835616438355e-07, "log_odds_chosen": 4.951180458068848, "log_odds_ratio": -0.077230304479599, "logits/chosen": 1.0755224227905273, "logits/rejected": 1.1133995056152344, "logps/chosen": -1.6527016162872314, "logps/rejected": -6.3423638343811035, "loss": 0.5213, "nll_loss": 0.5135952234268188, "rewards/accuracies": 1.0, "rewards/chosen": -0.16527016460895538, "rewards/margins": 0.4689662754535675, "rewards/rejected": -0.6342363953590393, "step": 4917 }, { "epoch": 13.464750171115675, "grad_norm": 3.4301085472106934, "learning_rate": 3.263013698630137e-07, "log_odds_chosen": 2.2223262786865234, "log_odds_ratio": -0.2654043734073639, "logits/chosen": 1.200110912322998, "logits/rejected": 1.1694780588150024, "logps/chosen": -2.2274248600006104, "logps/rejected": -4.284947872161865, "loss": 0.5584, "nll_loss": 0.5319058895111084, "rewards/accuracies": 0.875, "rewards/chosen": -0.22274249792099, "rewards/margins": 0.20575234293937683, "rewards/rejected": -0.42849481105804443, "step": 4918 }, { "epoch": 13.467488021902806, "grad_norm": 3.445657968521118, "learning_rate": 3.2616438356164385e-07, "log_odds_chosen": 1.9798530340194702, "log_odds_ratio": -0.20500165224075317, "logits/chosen": 1.0751599073410034, "logits/rejected": 1.0640339851379395, "logps/chosen": -1.826955795288086, "logps/rejected": -3.6223626136779785, "loss": 0.5514, "nll_loss": 0.5308628082275391, "rewards/accuracies": 1.0, "rewards/chosen": -0.18269556760787964, "rewards/margins": 0.17954067885875702, "rewards/rejected": -0.36223626136779785, "step": 4919 }, { "epoch": 13.470225872689939, "grad_norm": 3.5589404106140137, "learning_rate": 3.2602739726027395e-07, "log_odds_chosen": 1.186252474784851, "log_odds_ratio": -0.34112781286239624, "logits/chosen": 1.0547337532043457, "logits/rejected": 1.088075041770935, "logps/chosen": -1.5408470630645752, "logps/rejected": -2.567086935043335, "loss": 0.4822, "nll_loss": 0.44807082414627075, "rewards/accuracies": 0.875, "rewards/chosen": -0.154084712266922, "rewards/margins": 0.10262397676706314, "rewards/rejected": -0.25670871138572693, "step": 4920 }, { "epoch": 13.47296372347707, "grad_norm": 3.625675678253174, "learning_rate": 3.2589041095890405e-07, "log_odds_chosen": 4.996610641479492, "log_odds_ratio": -0.0808868482708931, "logits/chosen": 1.2915796041488647, "logits/rejected": 1.3237518072128296, "logps/chosen": -1.5723586082458496, "logps/rejected": -6.346223831176758, "loss": 0.4976, "nll_loss": 0.4894711971282959, "rewards/accuracies": 1.0, "rewards/chosen": -0.15723584592342377, "rewards/margins": 0.4773865342140198, "rewards/rejected": -0.63462233543396, "step": 4921 }, { "epoch": 13.475701574264203, "grad_norm": 3.967787742614746, "learning_rate": 3.2575342465753425e-07, "log_odds_chosen": 2.835090160369873, "log_odds_ratio": -0.12668108940124512, "logits/chosen": 0.6845335960388184, "logits/rejected": 0.5510826706886292, "logps/chosen": -1.7084176540374756, "logps/rejected": -4.369478702545166, "loss": 0.6201, "nll_loss": 0.6074023246765137, "rewards/accuracies": 1.0, "rewards/chosen": -0.1708417683839798, "rewards/margins": 0.26610609889030457, "rewards/rejected": -0.43694788217544556, "step": 4922 }, { "epoch": 13.478439425051334, "grad_norm": 4.434597015380859, "learning_rate": 3.2561643835616435e-07, "log_odds_chosen": 2.8930623531341553, "log_odds_ratio": -0.24726000428199768, "logits/chosen": 1.4577289819717407, "logits/rejected": 1.5330179929733276, "logps/chosen": -2.287978172302246, "logps/rejected": -5.093642234802246, "loss": 0.6162, "nll_loss": 0.5914809703826904, "rewards/accuracies": 0.875, "rewards/chosen": -0.2287978231906891, "rewards/margins": 0.28056639432907104, "rewards/rejected": -0.5093642473220825, "step": 4923 }, { "epoch": 13.481177275838467, "grad_norm": 3.1697919368743896, "learning_rate": 3.254794520547945e-07, "log_odds_chosen": 3.2053041458129883, "log_odds_ratio": -0.16552311182022095, "logits/chosen": 0.90260910987854, "logits/rejected": 0.8628307580947876, "logps/chosen": -1.8174256086349487, "logps/rejected": -4.8650665283203125, "loss": 0.5339, "nll_loss": 0.5173404812812805, "rewards/accuracies": 1.0, "rewards/chosen": -0.1817425787448883, "rewards/margins": 0.3047640919685364, "rewards/rejected": -0.4865066409111023, "step": 4924 }, { "epoch": 13.483915126625599, "grad_norm": 3.3656280040740967, "learning_rate": 3.2534246575342466e-07, "log_odds_chosen": 2.051774501800537, "log_odds_ratio": -0.24109892547130585, "logits/chosen": 1.1706210374832153, "logits/rejected": 1.1747204065322876, "logps/chosen": -1.6910297870635986, "logps/rejected": -3.6007046699523926, "loss": 0.5574, "nll_loss": 0.5332728624343872, "rewards/accuracies": 0.875, "rewards/chosen": -0.1691029816865921, "rewards/margins": 0.19096750020980835, "rewards/rejected": -0.36007046699523926, "step": 4925 }, { "epoch": 13.486652977412732, "grad_norm": 3.8049960136413574, "learning_rate": 3.252054794520548e-07, "log_odds_chosen": 2.5888235569000244, "log_odds_ratio": -0.23327267169952393, "logits/chosen": 0.8891832828521729, "logits/rejected": 0.8808033466339111, "logps/chosen": -1.9621655941009521, "logps/rejected": -4.423440933227539, "loss": 0.6699, "nll_loss": 0.6465703248977661, "rewards/accuracies": 1.0, "rewards/chosen": -0.19621655344963074, "rewards/margins": 0.24612756073474884, "rewards/rejected": -0.44234412908554077, "step": 4926 }, { "epoch": 13.489390828199863, "grad_norm": 4.085444450378418, "learning_rate": 3.250684931506849e-07, "log_odds_chosen": 0.9933563470840454, "log_odds_ratio": -0.4659041166305542, "logits/chosen": 1.1625356674194336, "logits/rejected": 1.0863258838653564, "logps/chosen": -1.9497560262680054, "logps/rejected": -2.7701542377471924, "loss": 0.5716, "nll_loss": 0.5249597430229187, "rewards/accuracies": 0.875, "rewards/chosen": -0.1949755996465683, "rewards/margins": 0.08203982561826706, "rewards/rejected": -0.27701541781425476, "step": 4927 }, { "epoch": 13.492128678986996, "grad_norm": 7.463710784912109, "learning_rate": 3.24931506849315e-07, "log_odds_chosen": 3.2485837936401367, "log_odds_ratio": -0.19666971266269684, "logits/chosen": 1.2080307006835938, "logits/rejected": 1.1805682182312012, "logps/chosen": -2.218219757080078, "logps/rejected": -5.288471221923828, "loss": 0.5829, "nll_loss": 0.5631860494613647, "rewards/accuracies": 1.0, "rewards/chosen": -0.22182199358940125, "rewards/margins": 0.30702510476112366, "rewards/rejected": -0.5288470983505249, "step": 4928 }, { "epoch": 13.494866529774127, "grad_norm": 3.4752416610717773, "learning_rate": 3.247945205479452e-07, "log_odds_chosen": 2.8122169971466064, "log_odds_ratio": -0.17540514469146729, "logits/chosen": 1.1583406925201416, "logits/rejected": 1.1327903270721436, "logps/chosen": -1.396759033203125, "logps/rejected": -3.886996030807495, "loss": 0.4715, "nll_loss": 0.4539318382740021, "rewards/accuracies": 1.0, "rewards/chosen": -0.13967588543891907, "rewards/margins": 0.24902373552322388, "rewards/rejected": -0.38869965076446533, "step": 4929 }, { "epoch": 13.49760438056126, "grad_norm": 7.682637691497803, "learning_rate": 3.246575342465753e-07, "log_odds_chosen": 1.0415000915527344, "log_odds_ratio": -0.8144801259040833, "logits/chosen": 1.1564239263534546, "logits/rejected": 1.2505712509155273, "logps/chosen": -3.295362949371338, "logps/rejected": -4.26484489440918, "loss": 0.6864, "nll_loss": 0.6049026250839233, "rewards/accuracies": 0.875, "rewards/chosen": -0.3295363187789917, "rewards/margins": 0.09694816917181015, "rewards/rejected": -0.42648449540138245, "step": 4930 }, { "epoch": 13.500342231348391, "grad_norm": 4.461737155914307, "learning_rate": 3.2452054794520546e-07, "log_odds_chosen": 3.242363452911377, "log_odds_ratio": -0.29722240567207336, "logits/chosen": 1.0097118616104126, "logits/rejected": 1.0685889720916748, "logps/chosen": -2.5200634002685547, "logps/rejected": -5.679145812988281, "loss": 0.7182, "nll_loss": 0.6884329915046692, "rewards/accuracies": 0.875, "rewards/chosen": -0.2520063519477844, "rewards/margins": 0.315908282995224, "rewards/rejected": -0.5679146647453308, "step": 4931 }, { "epoch": 13.503080082135524, "grad_norm": 2.7909326553344727, "learning_rate": 3.243835616438356e-07, "log_odds_chosen": 3.8551347255706787, "log_odds_ratio": -0.1865781843662262, "logits/chosen": 1.5013126134872437, "logits/rejected": 1.4551411867141724, "logps/chosen": -1.8687968254089355, "logps/rejected": -5.534071922302246, "loss": 0.4523, "nll_loss": 0.4336116909980774, "rewards/accuracies": 1.0, "rewards/chosen": -0.18687967956066132, "rewards/margins": 0.3665274977684021, "rewards/rejected": -0.5534071922302246, "step": 4932 }, { "epoch": 13.505817932922655, "grad_norm": 4.4276838302612305, "learning_rate": 3.2424657534246577e-07, "log_odds_chosen": 1.973401665687561, "log_odds_ratio": -0.19360686838626862, "logits/chosen": 0.8601663112640381, "logits/rejected": 0.7944839000701904, "logps/chosen": -1.9701441526412964, "logps/rejected": -3.6939902305603027, "loss": 0.4747, "nll_loss": 0.4553877115249634, "rewards/accuracies": 1.0, "rewards/chosen": -0.19701440632343292, "rewards/margins": 0.17238463461399078, "rewards/rejected": -0.3693990707397461, "step": 4933 }, { "epoch": 13.508555783709788, "grad_norm": 3.1008963584899902, "learning_rate": 3.2410958904109587e-07, "log_odds_chosen": 3.529816150665283, "log_odds_ratio": -0.1479552686214447, "logits/chosen": 0.9839047789573669, "logits/rejected": 0.9830734729766846, "logps/chosen": -1.6912016868591309, "logps/rejected": -5.038891792297363, "loss": 0.5301, "nll_loss": 0.5153350830078125, "rewards/accuracies": 1.0, "rewards/chosen": -0.1691201627254486, "rewards/margins": 0.33476904034614563, "rewards/rejected": -0.5038892030715942, "step": 4934 }, { "epoch": 13.51129363449692, "grad_norm": 3.4473652839660645, "learning_rate": 3.2397260273972597e-07, "log_odds_chosen": 3.722318410873413, "log_odds_ratio": -0.19207613170146942, "logits/chosen": 1.0020503997802734, "logits/rejected": 0.9965966939926147, "logps/chosen": -2.8274221420288086, "logps/rejected": -6.4471282958984375, "loss": 0.5848, "nll_loss": 0.5656316876411438, "rewards/accuracies": 1.0, "rewards/chosen": -0.2827422022819519, "rewards/margins": 0.3619706630706787, "rewards/rejected": -0.6447128653526306, "step": 4935 }, { "epoch": 13.514031485284052, "grad_norm": 5.364511013031006, "learning_rate": 3.2383561643835617e-07, "log_odds_chosen": 3.1913363933563232, "log_odds_ratio": -0.2057170420885086, "logits/chosen": 1.0309110879898071, "logits/rejected": 1.0182852745056152, "logps/chosen": -2.132627010345459, "logps/rejected": -4.998025894165039, "loss": 0.5467, "nll_loss": 0.5261343121528625, "rewards/accuracies": 0.875, "rewards/chosen": -0.21326270699501038, "rewards/margins": 0.28653988242149353, "rewards/rejected": -0.4998025596141815, "step": 4936 }, { "epoch": 13.516769336071183, "grad_norm": 3.531548500061035, "learning_rate": 3.2369863013698627e-07, "log_odds_chosen": 2.1197824478149414, "log_odds_ratio": -0.15646860003471375, "logits/chosen": 1.1888492107391357, "logits/rejected": 1.1970354318618774, "logps/chosen": -2.677321672439575, "logps/rejected": -4.642130374908447, "loss": 0.62, "nll_loss": 0.6043199300765991, "rewards/accuracies": 1.0, "rewards/chosen": -0.2677321434020996, "rewards/margins": 0.1964809149503708, "rewards/rejected": -0.4642130732536316, "step": 4937 }, { "epoch": 13.519507186858316, "grad_norm": 3.4409773349761963, "learning_rate": 3.235616438356164e-07, "log_odds_chosen": 2.705336093902588, "log_odds_ratio": -0.239397794008255, "logits/chosen": 1.0107418298721313, "logits/rejected": 1.053611159324646, "logps/chosen": -1.7969679832458496, "logps/rejected": -4.329058647155762, "loss": 0.5307, "nll_loss": 0.5067758560180664, "rewards/accuracies": 1.0, "rewards/chosen": -0.17969679832458496, "rewards/margins": 0.25320908427238464, "rewards/rejected": -0.4329058825969696, "step": 4938 }, { "epoch": 13.522245037645447, "grad_norm": 3.895601749420166, "learning_rate": 3.234246575342466e-07, "log_odds_chosen": 1.5269628763198853, "log_odds_ratio": -0.3572682738304138, "logits/chosen": 1.0116899013519287, "logits/rejected": 0.9512626528739929, "logps/chosen": -2.253746747970581, "logps/rejected": -3.6339845657348633, "loss": 0.6104, "nll_loss": 0.5746756196022034, "rewards/accuracies": 0.875, "rewards/chosen": -0.22537466883659363, "rewards/margins": 0.13802377879619598, "rewards/rejected": -0.3633984625339508, "step": 4939 }, { "epoch": 13.52498288843258, "grad_norm": 3.702498435974121, "learning_rate": 3.2328767123287673e-07, "log_odds_chosen": 2.7888779640197754, "log_odds_ratio": -0.12538392841815948, "logits/chosen": 1.1737922430038452, "logits/rejected": 1.1222552061080933, "logps/chosen": -1.5175126791000366, "logps/rejected": -4.065324306488037, "loss": 0.4595, "nll_loss": 0.44698405265808105, "rewards/accuracies": 1.0, "rewards/chosen": -0.15175125002861023, "rewards/margins": 0.25478118658065796, "rewards/rejected": -0.4065324664115906, "step": 4940 }, { "epoch": 13.527720739219713, "grad_norm": 4.3670501708984375, "learning_rate": 3.2315068493150683e-07, "log_odds_chosen": 2.675031900405884, "log_odds_ratio": -0.2601449489593506, "logits/chosen": 1.007909893989563, "logits/rejected": 0.9755016565322876, "logps/chosen": -1.5187935829162598, "logps/rejected": -3.912999391555786, "loss": 0.5116, "nll_loss": 0.48561614751815796, "rewards/accuracies": 0.875, "rewards/chosen": -0.15187935531139374, "rewards/margins": 0.2394205927848816, "rewards/rejected": -0.3912999629974365, "step": 4941 }, { "epoch": 13.530458590006845, "grad_norm": 3.8825478553771973, "learning_rate": 3.23013698630137e-07, "log_odds_chosen": 3.907137393951416, "log_odds_ratio": -0.20788992941379547, "logits/chosen": 1.13929283618927, "logits/rejected": 1.1803314685821533, "logps/chosen": -1.9241347312927246, "logps/rejected": -5.705060958862305, "loss": 0.6013, "nll_loss": 0.580536425113678, "rewards/accuracies": 0.875, "rewards/chosen": -0.19241346418857574, "rewards/margins": 0.37809258699417114, "rewards/rejected": -0.5705060958862305, "step": 4942 }, { "epoch": 13.533196440793978, "grad_norm": 3.4508864879608154, "learning_rate": 3.2287671232876713e-07, "log_odds_chosen": 2.0064852237701416, "log_odds_ratio": -0.2627655267715454, "logits/chosen": 1.0989413261413574, "logits/rejected": 1.084578037261963, "logps/chosen": -1.8563532829284668, "logps/rejected": -3.7215375900268555, "loss": 0.5053, "nll_loss": 0.47901830077171326, "rewards/accuracies": 0.875, "rewards/chosen": -0.18563534319400787, "rewards/margins": 0.18651844561100006, "rewards/rejected": -0.3721538186073303, "step": 4943 }, { "epoch": 13.535934291581109, "grad_norm": 8.283843994140625, "learning_rate": 3.2273972602739723e-07, "log_odds_chosen": 2.1657960414886475, "log_odds_ratio": -0.20133717358112335, "logits/chosen": 1.2593166828155518, "logits/rejected": 1.258813738822937, "logps/chosen": -2.4084019660949707, "logps/rejected": -4.470876693725586, "loss": 0.5984, "nll_loss": 0.5782390832901001, "rewards/accuracies": 1.0, "rewards/chosen": -0.24084021151065826, "rewards/margins": 0.2062474489212036, "rewards/rejected": -0.44708767533302307, "step": 4944 }, { "epoch": 13.538672142368242, "grad_norm": 3.5074751377105713, "learning_rate": 3.226027397260274e-07, "log_odds_chosen": 2.668250560760498, "log_odds_ratio": -0.2816997170448303, "logits/chosen": 1.07985520362854, "logits/rejected": 1.0619012117385864, "logps/chosen": -2.028193712234497, "logps/rejected": -4.5921783447265625, "loss": 0.5488, "nll_loss": 0.5205905437469482, "rewards/accuracies": 0.875, "rewards/chosen": -0.20281937718391418, "rewards/margins": 0.256398469209671, "rewards/rejected": -0.4592178761959076, "step": 4945 }, { "epoch": 13.541409993155373, "grad_norm": 3.3740406036376953, "learning_rate": 3.2246575342465753e-07, "log_odds_chosen": 3.533198356628418, "log_odds_ratio": -0.07083889842033386, "logits/chosen": 1.2055891752243042, "logits/rejected": 1.222993016242981, "logps/chosen": -1.7885957956314087, "logps/rejected": -5.116971969604492, "loss": 0.5034, "nll_loss": 0.49631351232528687, "rewards/accuracies": 1.0, "rewards/chosen": -0.17885959148406982, "rewards/margins": 0.3328375816345215, "rewards/rejected": -0.5116971731185913, "step": 4946 }, { "epoch": 13.544147843942506, "grad_norm": 6.523184299468994, "learning_rate": 3.2232876712328763e-07, "log_odds_chosen": 1.497552752494812, "log_odds_ratio": -0.3853527903556824, "logits/chosen": 1.2646197080612183, "logits/rejected": 1.278520941734314, "logps/chosen": -2.6494178771972656, "logps/rejected": -4.03444242477417, "loss": 0.5382, "nll_loss": 0.4996322691440582, "rewards/accuracies": 0.75, "rewards/chosen": -0.2649417817592621, "rewards/margins": 0.13850249350070953, "rewards/rejected": -0.4034442901611328, "step": 4947 }, { "epoch": 13.546885694729637, "grad_norm": 3.5271313190460205, "learning_rate": 3.221917808219178e-07, "log_odds_chosen": 3.807952642440796, "log_odds_ratio": -0.12001123279333115, "logits/chosen": 1.1742664575576782, "logits/rejected": 1.1445138454437256, "logps/chosen": -2.2400412559509277, "logps/rejected": -5.911655426025391, "loss": 0.7276, "nll_loss": 0.7156177759170532, "rewards/accuracies": 1.0, "rewards/chosen": -0.2240041196346283, "rewards/margins": 0.36716142296791077, "rewards/rejected": -0.5911655426025391, "step": 4948 }, { "epoch": 13.54962354551677, "grad_norm": 3.957058906555176, "learning_rate": 3.2205479452054794e-07, "log_odds_chosen": 1.8163102865219116, "log_odds_ratio": -0.2959328889846802, "logits/chosen": 0.9720577001571655, "logits/rejected": 0.9790791273117065, "logps/chosen": -1.9903514385223389, "logps/rejected": -3.675769805908203, "loss": 0.5443, "nll_loss": 0.5147445797920227, "rewards/accuracies": 0.875, "rewards/chosen": -0.1990351676940918, "rewards/margins": 0.1685418039560318, "rewards/rejected": -0.3675769567489624, "step": 4949 }, { "epoch": 13.552361396303901, "grad_norm": 6.447842597961426, "learning_rate": 3.219178082191781e-07, "log_odds_chosen": 1.9134691953659058, "log_odds_ratio": -0.28242728114128113, "logits/chosen": 0.8307014107704163, "logits/rejected": 0.7984382510185242, "logps/chosen": -1.8548961877822876, "logps/rejected": -3.5820553302764893, "loss": 0.5931, "nll_loss": 0.564811646938324, "rewards/accuracies": 0.875, "rewards/chosen": -0.18548962473869324, "rewards/margins": 0.1727159023284912, "rewards/rejected": -0.35820549726486206, "step": 4950 }, { "epoch": 13.555099247091034, "grad_norm": 3.6023852825164795, "learning_rate": 3.217808219178082e-07, "log_odds_chosen": 2.034914970397949, "log_odds_ratio": -0.1458245813846588, "logits/chosen": 1.1127763986587524, "logits/rejected": 1.0788004398345947, "logps/chosen": -1.7064279317855835, "logps/rejected": -3.4072415828704834, "loss": 0.5655, "nll_loss": 0.5509540438652039, "rewards/accuracies": 1.0, "rewards/chosen": -0.17064279317855835, "rewards/margins": 0.17008139193058014, "rewards/rejected": -0.3407241702079773, "step": 4951 }, { "epoch": 13.557837097878165, "grad_norm": 3.307952404022217, "learning_rate": 3.2164383561643834e-07, "log_odds_chosen": 5.649110794067383, "log_odds_ratio": -0.04435400664806366, "logits/chosen": 1.1662590503692627, "logits/rejected": 1.1179782152175903, "logps/chosen": -1.8604071140289307, "logps/rejected": -7.286473274230957, "loss": 0.5559, "nll_loss": 0.5514358282089233, "rewards/accuracies": 1.0, "rewards/chosen": -0.1860407143831253, "rewards/margins": 0.5426066517829895, "rewards/rejected": -0.7286472916603088, "step": 4952 }, { "epoch": 13.560574948665298, "grad_norm": 5.560242176055908, "learning_rate": 3.215068493150685e-07, "log_odds_chosen": 1.4419323205947876, "log_odds_ratio": -0.49412113428115845, "logits/chosen": 1.2757470607757568, "logits/rejected": 1.2159159183502197, "logps/chosen": -2.4972519874572754, "logps/rejected": -3.8348066806793213, "loss": 0.6251, "nll_loss": 0.5756572484970093, "rewards/accuracies": 0.875, "rewards/chosen": -0.24972519278526306, "rewards/margins": 0.13375547528266907, "rewards/rejected": -0.38348066806793213, "step": 4953 }, { "epoch": 13.56331279945243, "grad_norm": 4.265279769897461, "learning_rate": 3.213698630136986e-07, "log_odds_chosen": 2.8114013671875, "log_odds_ratio": -0.14793871343135834, "logits/chosen": 0.8729541301727295, "logits/rejected": 0.826433539390564, "logps/chosen": -2.0046467781066895, "logps/rejected": -4.60830020904541, "loss": 0.6921, "nll_loss": 0.6773325204849243, "rewards/accuracies": 1.0, "rewards/chosen": -0.20046469569206238, "rewards/margins": 0.2603653371334076, "rewards/rejected": -0.46083003282546997, "step": 4954 }, { "epoch": 13.566050650239562, "grad_norm": 3.6461193561553955, "learning_rate": 3.2123287671232874e-07, "log_odds_chosen": 1.606453537940979, "log_odds_ratio": -0.28573501110076904, "logits/chosen": 1.0334593057632446, "logits/rejected": 0.9952822923660278, "logps/chosen": -1.644810676574707, "logps/rejected": -3.0893592834472656, "loss": 0.5716, "nll_loss": 0.5429766774177551, "rewards/accuracies": 0.875, "rewards/chosen": -0.16448107361793518, "rewards/margins": 0.14445483684539795, "rewards/rejected": -0.30893591046333313, "step": 4955 }, { "epoch": 13.568788501026694, "grad_norm": 3.6041030883789062, "learning_rate": 3.210958904109589e-07, "log_odds_chosen": 2.081017255783081, "log_odds_ratio": -0.4075435400009155, "logits/chosen": 1.3367562294006348, "logits/rejected": 1.3317261934280396, "logps/chosen": -1.6508963108062744, "logps/rejected": -3.647412061691284, "loss": 0.5127, "nll_loss": 0.4719114899635315, "rewards/accuracies": 0.75, "rewards/chosen": -0.16508963704109192, "rewards/margins": 0.1996515989303589, "rewards/rejected": -0.3647412359714508, "step": 4956 }, { "epoch": 13.571526351813826, "grad_norm": 7.019189834594727, "learning_rate": 3.2095890410958905e-07, "log_odds_chosen": 0.9829870462417603, "log_odds_ratio": -0.5927249789237976, "logits/chosen": 0.9859118461608887, "logits/rejected": 0.9111393094062805, "logps/chosen": -2.2555665969848633, "logps/rejected": -3.081425905227661, "loss": 0.6087, "nll_loss": 0.5494382381439209, "rewards/accuracies": 0.75, "rewards/chosen": -0.22555668652057648, "rewards/margins": 0.08258592337369919, "rewards/rejected": -0.30814260244369507, "step": 4957 }, { "epoch": 13.574264202600958, "grad_norm": 4.3273773193359375, "learning_rate": 3.2082191780821915e-07, "log_odds_chosen": 3.6688103675842285, "log_odds_ratio": -0.1321253478527069, "logits/chosen": 1.1488579511642456, "logits/rejected": 1.2458209991455078, "logps/chosen": -2.475724935531616, "logps/rejected": -6.0049052238464355, "loss": 0.5441, "nll_loss": 0.5308814644813538, "rewards/accuracies": 1.0, "rewards/chosen": -0.24757251143455505, "rewards/margins": 0.35291802883148193, "rewards/rejected": -0.6004905104637146, "step": 4958 }, { "epoch": 13.57700205338809, "grad_norm": 5.640120983123779, "learning_rate": 3.206849315068493e-07, "log_odds_chosen": 1.5198646783828735, "log_odds_ratio": -0.4737328886985779, "logits/chosen": 0.9587962627410889, "logits/rejected": 0.9420844912528992, "logps/chosen": -2.2017111778259277, "logps/rejected": -3.619158983230591, "loss": 0.6153, "nll_loss": 0.5679529905319214, "rewards/accuracies": 0.75, "rewards/chosen": -0.22017113864421844, "rewards/margins": 0.1417447328567505, "rewards/rejected": -0.3619159162044525, "step": 4959 }, { "epoch": 13.579739904175222, "grad_norm": 4.236108779907227, "learning_rate": 3.2054794520547945e-07, "log_odds_chosen": 1.4843796491622925, "log_odds_ratio": -0.25466254353523254, "logits/chosen": 1.023053765296936, "logits/rejected": 0.9096107482910156, "logps/chosen": -1.375954508781433, "logps/rejected": -2.65372371673584, "loss": 0.5141, "nll_loss": 0.4886619448661804, "rewards/accuracies": 1.0, "rewards/chosen": -0.13759545981884003, "rewards/margins": 0.12777693569660187, "rewards/rejected": -0.2653723955154419, "step": 4960 }, { "epoch": 13.582477754962355, "grad_norm": 3.9438345432281494, "learning_rate": 3.2041095890410955e-07, "log_odds_chosen": 1.2845512628555298, "log_odds_ratio": -0.3516625761985779, "logits/chosen": 1.2775706052780151, "logits/rejected": 1.2628233432769775, "logps/chosen": -1.6861419677734375, "logps/rejected": -2.8128199577331543, "loss": 0.4919, "nll_loss": 0.45671790838241577, "rewards/accuracies": 0.875, "rewards/chosen": -0.1686142086982727, "rewards/margins": 0.11266781389713287, "rewards/rejected": -0.2812820076942444, "step": 4961 }, { "epoch": 13.585215605749486, "grad_norm": 3.5354487895965576, "learning_rate": 3.202739726027397e-07, "log_odds_chosen": 3.4211997985839844, "log_odds_ratio": -0.21528193354606628, "logits/chosen": 0.9139703512191772, "logits/rejected": 0.947481095790863, "logps/chosen": -2.434882164001465, "logps/rejected": -5.731199741363525, "loss": 0.5279, "nll_loss": 0.5064196586608887, "rewards/accuracies": 0.875, "rewards/chosen": -0.24348822236061096, "rewards/margins": 0.3296317458152771, "rewards/rejected": -0.5731199979782104, "step": 4962 }, { "epoch": 13.587953456536619, "grad_norm": 3.8170833587646484, "learning_rate": 3.2013698630136985e-07, "log_odds_chosen": 3.9426932334899902, "log_odds_ratio": -0.17253410816192627, "logits/chosen": 0.7466855049133301, "logits/rejected": 0.7843534350395203, "logps/chosen": -1.9896767139434814, "logps/rejected": -5.8010711669921875, "loss": 0.656, "nll_loss": 0.6387604475021362, "rewards/accuracies": 1.0, "rewards/chosen": -0.19896766543388367, "rewards/margins": 0.38113945722579956, "rewards/rejected": -0.5801071524620056, "step": 4963 }, { "epoch": 13.59069130732375, "grad_norm": 3.5847177505493164, "learning_rate": 3.2e-07, "log_odds_chosen": 5.569282531738281, "log_odds_ratio": -0.12081858515739441, "logits/chosen": 0.952146053314209, "logits/rejected": 0.9579117298126221, "logps/chosen": -1.85517418384552, "logps/rejected": -7.2402262687683105, "loss": 0.5899, "nll_loss": 0.5777683258056641, "rewards/accuracies": 1.0, "rewards/chosen": -0.18551743030548096, "rewards/margins": 0.5385051965713501, "rewards/rejected": -0.724022626876831, "step": 4964 }, { "epoch": 13.593429158110883, "grad_norm": 3.521864414215088, "learning_rate": 3.198630136986301e-07, "log_odds_chosen": 3.0644779205322266, "log_odds_ratio": -0.15552230179309845, "logits/chosen": 0.9251435995101929, "logits/rejected": 0.8871012926101685, "logps/chosen": -1.6331144571304321, "logps/rejected": -4.512370586395264, "loss": 0.5053, "nll_loss": 0.48976749181747437, "rewards/accuracies": 1.0, "rewards/chosen": -0.1633114516735077, "rewards/margins": 0.2879256010055542, "rewards/rejected": -0.4512370824813843, "step": 4965 }, { "epoch": 13.596167008898014, "grad_norm": 4.265573024749756, "learning_rate": 3.1972602739726026e-07, "log_odds_chosen": 2.0689644813537598, "log_odds_ratio": -0.281389057636261, "logits/chosen": 1.362623929977417, "logits/rejected": 1.3656214475631714, "logps/chosen": -2.23498272895813, "logps/rejected": -4.198075294494629, "loss": 0.6318, "nll_loss": 0.6036346554756165, "rewards/accuracies": 0.875, "rewards/chosen": -0.22349826991558075, "rewards/margins": 0.19630929827690125, "rewards/rejected": -0.4198075532913208, "step": 4966 }, { "epoch": 13.598904859685147, "grad_norm": 3.1372578144073486, "learning_rate": 3.195890410958904e-07, "log_odds_chosen": 2.244873285293579, "log_odds_ratio": -0.22266215085983276, "logits/chosen": 1.1147363185882568, "logits/rejected": 1.100637674331665, "logps/chosen": -1.7441844940185547, "logps/rejected": -3.8085227012634277, "loss": 0.5806, "nll_loss": 0.5583785176277161, "rewards/accuracies": 0.875, "rewards/chosen": -0.17441844940185547, "rewards/margins": 0.20643380284309387, "rewards/rejected": -0.38085228204727173, "step": 4967 }, { "epoch": 13.60164271047228, "grad_norm": 7.274834632873535, "learning_rate": 3.194520547945205e-07, "log_odds_chosen": 1.5379083156585693, "log_odds_ratio": -0.35890594124794006, "logits/chosen": 0.8987905383110046, "logits/rejected": 0.8816069960594177, "logps/chosen": -2.3099865913391113, "logps/rejected": -3.7512693405151367, "loss": 0.5971, "nll_loss": 0.5612029433250427, "rewards/accuracies": 0.75, "rewards/chosen": -0.23099865019321442, "rewards/margins": 0.14412829279899597, "rewards/rejected": -0.3751269578933716, "step": 4968 }, { "epoch": 13.604380561259411, "grad_norm": 6.6397600173950195, "learning_rate": 3.1931506849315066e-07, "log_odds_chosen": 2.5250349044799805, "log_odds_ratio": -0.4260476231575012, "logits/chosen": 1.1557666063308716, "logits/rejected": 1.1735368967056274, "logps/chosen": -2.944908380508423, "logps/rejected": -5.349639415740967, "loss": 0.652, "nll_loss": 0.6094170808792114, "rewards/accuracies": 0.75, "rewards/chosen": -0.29449084401130676, "rewards/margins": 0.24047315120697021, "rewards/rejected": -0.5349639654159546, "step": 4969 }, { "epoch": 13.607118412046544, "grad_norm": 4.195051670074463, "learning_rate": 3.191780821917808e-07, "log_odds_chosen": 1.6220061779022217, "log_odds_ratio": -0.24398702383041382, "logits/chosen": 0.9968194365501404, "logits/rejected": 0.9238003492355347, "logps/chosen": -1.5323604345321655, "logps/rejected": -2.9661190509796143, "loss": 0.4394, "nll_loss": 0.41501277685165405, "rewards/accuracies": 1.0, "rewards/chosen": -0.1532360464334488, "rewards/margins": 0.14337587356567383, "rewards/rejected": -0.2966119050979614, "step": 4970 }, { "epoch": 13.609856262833675, "grad_norm": 4.238082408905029, "learning_rate": 3.1904109589041097e-07, "log_odds_chosen": 2.774080276489258, "log_odds_ratio": -0.3089081943035126, "logits/chosen": 1.1276885271072388, "logits/rejected": 1.0033077001571655, "logps/chosen": -1.5503010749816895, "logps/rejected": -4.196197986602783, "loss": 0.5645, "nll_loss": 0.5336166620254517, "rewards/accuracies": 0.875, "rewards/chosen": -0.15503011643886566, "rewards/margins": 0.26458969712257385, "rewards/rejected": -0.4196198284626007, "step": 4971 }, { "epoch": 13.612594113620808, "grad_norm": 4.184213638305664, "learning_rate": 3.1890410958904106e-07, "log_odds_chosen": 2.6338796615600586, "log_odds_ratio": -0.169820636510849, "logits/chosen": 0.6942508816719055, "logits/rejected": 0.6342244744300842, "logps/chosen": -1.4217166900634766, "logps/rejected": -3.8159914016723633, "loss": 0.4511, "nll_loss": 0.43416494131088257, "rewards/accuracies": 1.0, "rewards/chosen": -0.1421716809272766, "rewards/margins": 0.23942746222019196, "rewards/rejected": -0.3815991282463074, "step": 4972 }, { "epoch": 13.61533196440794, "grad_norm": 3.548952341079712, "learning_rate": 3.187671232876712e-07, "log_odds_chosen": 3.7540268898010254, "log_odds_ratio": -0.11308665573596954, "logits/chosen": 1.1204140186309814, "logits/rejected": 1.111606478691101, "logps/chosen": -2.060136556625366, "logps/rejected": -5.648906230926514, "loss": 0.5659, "nll_loss": 0.554541826248169, "rewards/accuracies": 1.0, "rewards/chosen": -0.20601366460323334, "rewards/margins": 0.3588769733905792, "rewards/rejected": -0.5648906230926514, "step": 4973 }, { "epoch": 13.618069815195073, "grad_norm": 3.6088271141052246, "learning_rate": 3.1863013698630137e-07, "log_odds_chosen": 2.048034429550171, "log_odds_ratio": -0.29174232482910156, "logits/chosen": 1.0303577184677124, "logits/rejected": 1.0118521451950073, "logps/chosen": -1.5501837730407715, "logps/rejected": -3.438120126724243, "loss": 0.5149, "nll_loss": 0.48570188879966736, "rewards/accuracies": 0.875, "rewards/chosen": -0.1550183892250061, "rewards/margins": 0.18879365921020508, "rewards/rejected": -0.3438120484352112, "step": 4974 }, { "epoch": 13.620807665982204, "grad_norm": 3.4696364402770996, "learning_rate": 3.1849315068493147e-07, "log_odds_chosen": 4.352468490600586, "log_odds_ratio": -0.09774746000766754, "logits/chosen": 1.246725082397461, "logits/rejected": 1.2614381313323975, "logps/chosen": -2.390197277069092, "logps/rejected": -6.600797176361084, "loss": 0.5718, "nll_loss": 0.5619840025901794, "rewards/accuracies": 1.0, "rewards/chosen": -0.2390197217464447, "rewards/margins": 0.4210600256919861, "rewards/rejected": -0.6600797176361084, "step": 4975 }, { "epoch": 13.623545516769337, "grad_norm": 9.275130271911621, "learning_rate": 3.183561643835616e-07, "log_odds_chosen": 1.6135057210922241, "log_odds_ratio": -0.4985000491142273, "logits/chosen": 1.1513522863388062, "logits/rejected": 1.1335484981536865, "logps/chosen": -1.9177712202072144, "logps/rejected": -3.3955392837524414, "loss": 0.5568, "nll_loss": 0.5069681406021118, "rewards/accuracies": 0.875, "rewards/chosen": -0.19177711009979248, "rewards/margins": 0.14777681231498718, "rewards/rejected": -0.33955392241477966, "step": 4976 }, { "epoch": 13.626283367556468, "grad_norm": 4.393240928649902, "learning_rate": 3.1821917808219177e-07, "log_odds_chosen": 2.8636741638183594, "log_odds_ratio": -0.1558580994606018, "logits/chosen": 0.7941136360168457, "logits/rejected": 0.7027461528778076, "logps/chosen": -1.9351351261138916, "logps/rejected": -4.643872261047363, "loss": 0.5137, "nll_loss": 0.49807894229888916, "rewards/accuracies": 1.0, "rewards/chosen": -0.19351351261138916, "rewards/margins": 0.2708737850189209, "rewards/rejected": -0.46438726782798767, "step": 4977 }, { "epoch": 13.6290212183436, "grad_norm": 3.8955729007720947, "learning_rate": 3.180821917808219e-07, "log_odds_chosen": 2.63706636428833, "log_odds_ratio": -0.22348913550376892, "logits/chosen": 1.02308988571167, "logits/rejected": 1.0249308347702026, "logps/chosen": -2.1657469272613525, "logps/rejected": -4.68680477142334, "loss": 0.5696, "nll_loss": 0.5472162961959839, "rewards/accuracies": 1.0, "rewards/chosen": -0.21657468378543854, "rewards/margins": 0.25210580229759216, "rewards/rejected": -0.4686805307865143, "step": 4978 }, { "epoch": 13.631759069130732, "grad_norm": 5.834273815155029, "learning_rate": 3.17945205479452e-07, "log_odds_chosen": 1.7199673652648926, "log_odds_ratio": -0.28185415267944336, "logits/chosen": 1.0879530906677246, "logits/rejected": 1.0574145317077637, "logps/chosen": -1.6359999179840088, "logps/rejected": -3.119333267211914, "loss": 0.5161, "nll_loss": 0.48787590861320496, "rewards/accuracies": 1.0, "rewards/chosen": -0.16359999775886536, "rewards/margins": 0.148333340883255, "rewards/rejected": -0.31193333864212036, "step": 4979 }, { "epoch": 13.634496919917865, "grad_norm": 3.7732248306274414, "learning_rate": 3.1780821917808223e-07, "log_odds_chosen": 2.4182209968566895, "log_odds_ratio": -0.16817626357078552, "logits/chosen": 1.2184529304504395, "logits/rejected": 1.2333813905715942, "logps/chosen": -1.5478410720825195, "logps/rejected": -3.6389031410217285, "loss": 0.5187, "nll_loss": 0.501884937286377, "rewards/accuracies": 1.0, "rewards/chosen": -0.15478411316871643, "rewards/margins": 0.2091062217950821, "rewards/rejected": -0.36389032006263733, "step": 4980 }, { "epoch": 13.637234770704996, "grad_norm": 4.554165363311768, "learning_rate": 3.1767123287671233e-07, "log_odds_chosen": 1.4998502731323242, "log_odds_ratio": -0.8588830232620239, "logits/chosen": 0.9419537782669067, "logits/rejected": 0.9555597305297852, "logps/chosen": -2.5538456439971924, "logps/rejected": -3.9539756774902344, "loss": 0.7134, "nll_loss": 0.6275054216384888, "rewards/accuracies": 0.625, "rewards/chosen": -0.25538453459739685, "rewards/margins": 0.14001300930976868, "rewards/rejected": -0.3953975439071655, "step": 4981 }, { "epoch": 13.639972621492129, "grad_norm": 4.269142150878906, "learning_rate": 3.175342465753424e-07, "log_odds_chosen": 3.1270086765289307, "log_odds_ratio": -0.2146090269088745, "logits/chosen": 1.0926756858825684, "logits/rejected": 1.0769833326339722, "logps/chosen": -1.7774004936218262, "logps/rejected": -4.654762268066406, "loss": 0.5801, "nll_loss": 0.5586163401603699, "rewards/accuracies": 1.0, "rewards/chosen": -0.17774003744125366, "rewards/margins": 0.2877362370491028, "rewards/rejected": -0.46547627449035645, "step": 4982 }, { "epoch": 13.64271047227926, "grad_norm": 4.931666374206543, "learning_rate": 3.173972602739726e-07, "log_odds_chosen": 1.6975767612457275, "log_odds_ratio": -0.3673028349876404, "logits/chosen": 1.1435127258300781, "logits/rejected": 1.2202256917953491, "logps/chosen": -2.4349331855773926, "logps/rejected": -4.037069320678711, "loss": 0.5306, "nll_loss": 0.4938567280769348, "rewards/accuracies": 0.75, "rewards/chosen": -0.24349331855773926, "rewards/margins": 0.1602136194705963, "rewards/rejected": -0.4037069082260132, "step": 4983 }, { "epoch": 13.645448323066393, "grad_norm": 3.216435670852661, "learning_rate": 3.1726027397260273e-07, "log_odds_chosen": 3.1652235984802246, "log_odds_ratio": -0.15633231401443481, "logits/chosen": 1.1127878427505493, "logits/rejected": 1.087122917175293, "logps/chosen": -1.8819518089294434, "logps/rejected": -4.804610729217529, "loss": 0.5548, "nll_loss": 0.5391919612884521, "rewards/accuracies": 1.0, "rewards/chosen": -0.18819519877433777, "rewards/margins": 0.2922658920288086, "rewards/rejected": -0.480461061000824, "step": 4984 }, { "epoch": 13.648186173853524, "grad_norm": 10.210927963256836, "learning_rate": 3.171232876712329e-07, "log_odds_chosen": 1.9990814924240112, "log_odds_ratio": -0.37764135003089905, "logits/chosen": 1.4128650426864624, "logits/rejected": 1.4681190252304077, "logps/chosen": -3.1251583099365234, "logps/rejected": -5.0043230056762695, "loss": 0.6972, "nll_loss": 0.6594341397285461, "rewards/accuracies": 0.75, "rewards/chosen": -0.31251582503318787, "rewards/margins": 0.18791645765304565, "rewards/rejected": -0.5004323124885559, "step": 4985 }, { "epoch": 13.650924024640657, "grad_norm": 3.8335602283477783, "learning_rate": 3.16986301369863e-07, "log_odds_chosen": 2.270033121109009, "log_odds_ratio": -0.27635371685028076, "logits/chosen": 0.85049968957901, "logits/rejected": 0.7843299508094788, "logps/chosen": -1.6697990894317627, "logps/rejected": -3.7928454875946045, "loss": 0.5066, "nll_loss": 0.4789443612098694, "rewards/accuracies": 0.875, "rewards/chosen": -0.16697992384433746, "rewards/margins": 0.21230465173721313, "rewards/rejected": -0.3792845606803894, "step": 4986 }, { "epoch": 13.653661875427789, "grad_norm": 3.724493980407715, "learning_rate": 3.168493150684932e-07, "log_odds_chosen": 2.753955364227295, "log_odds_ratio": -0.24047544598579407, "logits/chosen": 1.339505672454834, "logits/rejected": 1.336287260055542, "logps/chosen": -1.8874090909957886, "logps/rejected": -4.468753337860107, "loss": 0.509, "nll_loss": 0.4849301278591156, "rewards/accuracies": 1.0, "rewards/chosen": -0.18874090909957886, "rewards/margins": 0.2581344544887543, "rewards/rejected": -0.44687536358833313, "step": 4987 }, { "epoch": 13.656399726214921, "grad_norm": 3.7341582775115967, "learning_rate": 3.167123287671233e-07, "log_odds_chosen": 3.8767786026000977, "log_odds_ratio": -0.22269463539123535, "logits/chosen": 1.010440707206726, "logits/rejected": 0.9157334566116333, "logps/chosen": -2.022113561630249, "logps/rejected": -5.735837459564209, "loss": 0.5102, "nll_loss": 0.48790472745895386, "rewards/accuracies": 0.875, "rewards/chosen": -0.20221135020256042, "rewards/margins": 0.37137240171432495, "rewards/rejected": -0.573583722114563, "step": 4988 }, { "epoch": 13.659137577002053, "grad_norm": 3.6992952823638916, "learning_rate": 3.165753424657534e-07, "log_odds_chosen": 1.9836069345474243, "log_odds_ratio": -0.21432116627693176, "logits/chosen": 0.8261976838111877, "logits/rejected": 0.8271253705024719, "logps/chosen": -1.7869517803192139, "logps/rejected": -3.5537824630737305, "loss": 0.4803, "nll_loss": 0.4588219225406647, "rewards/accuracies": 1.0, "rewards/chosen": -0.1786951869726181, "rewards/margins": 0.17668306827545166, "rewards/rejected": -0.35537827014923096, "step": 4989 }, { "epoch": 13.661875427789186, "grad_norm": 3.2637133598327637, "learning_rate": 3.1643835616438354e-07, "log_odds_chosen": 2.9940552711486816, "log_odds_ratio": -0.11318683624267578, "logits/chosen": 0.9601894021034241, "logits/rejected": 0.9628187417984009, "logps/chosen": -1.8145712614059448, "logps/rejected": -4.550487995147705, "loss": 0.534, "nll_loss": 0.522659420967102, "rewards/accuracies": 1.0, "rewards/chosen": -0.18145713210105896, "rewards/margins": 0.27359166741371155, "rewards/rejected": -0.4550487995147705, "step": 4990 }, { "epoch": 13.664613278576317, "grad_norm": 4.306469440460205, "learning_rate": 3.163013698630137e-07, "log_odds_chosen": 2.120431900024414, "log_odds_ratio": -0.4099932909011841, "logits/chosen": 1.0439211130142212, "logits/rejected": 1.1056665182113647, "logps/chosen": -2.0273287296295166, "logps/rejected": -4.034136772155762, "loss": 0.5687, "nll_loss": 0.5276763439178467, "rewards/accuracies": 0.75, "rewards/chosen": -0.2027328908443451, "rewards/margins": 0.20068082213401794, "rewards/rejected": -0.40341371297836304, "step": 4991 }, { "epoch": 13.66735112936345, "grad_norm": 5.919382095336914, "learning_rate": 3.1616438356164384e-07, "log_odds_chosen": 2.7344555854797363, "log_odds_ratio": -0.10923793911933899, "logits/chosen": 1.023489236831665, "logits/rejected": 0.9922165870666504, "logps/chosen": -1.7319279909133911, "logps/rejected": -4.2647809982299805, "loss": 0.5438, "nll_loss": 0.5329058766365051, "rewards/accuracies": 1.0, "rewards/chosen": -0.1731927990913391, "rewards/margins": 0.25328534841537476, "rewards/rejected": -0.4264781177043915, "step": 4992 }, { "epoch": 13.670088980150581, "grad_norm": 3.752317190170288, "learning_rate": 3.1602739726027394e-07, "log_odds_chosen": 1.3523350954055786, "log_odds_ratio": -0.39371973276138306, "logits/chosen": 1.1944979429244995, "logits/rejected": 1.1768527030944824, "logps/chosen": -1.476567029953003, "logps/rejected": -2.7019705772399902, "loss": 0.4422, "nll_loss": 0.4027871787548065, "rewards/accuracies": 0.75, "rewards/chosen": -0.14765670895576477, "rewards/margins": 0.12254033982753754, "rewards/rejected": -0.2701970338821411, "step": 4993 }, { "epoch": 13.672826830937714, "grad_norm": 3.9213478565216064, "learning_rate": 3.158904109589041e-07, "log_odds_chosen": 4.35254430770874, "log_odds_ratio": -0.1177646666765213, "logits/chosen": 1.0682789087295532, "logits/rejected": 1.0650510787963867, "logps/chosen": -1.744471549987793, "logps/rejected": -5.830560684204102, "loss": 0.5352, "nll_loss": 0.523389458656311, "rewards/accuracies": 1.0, "rewards/chosen": -0.174447163939476, "rewards/margins": 0.40860894322395325, "rewards/rejected": -0.5830560922622681, "step": 4994 }, { "epoch": 13.675564681724847, "grad_norm": 3.3683645725250244, "learning_rate": 3.1575342465753424e-07, "log_odds_chosen": 2.2274835109710693, "log_odds_ratio": -0.23510709404945374, "logits/chosen": 1.200386643409729, "logits/rejected": 1.153189778327942, "logps/chosen": -2.292935371398926, "logps/rejected": -4.382928848266602, "loss": 0.5656, "nll_loss": 0.5421013832092285, "rewards/accuracies": 1.0, "rewards/chosen": -0.22929352521896362, "rewards/margins": 0.208999365568161, "rewards/rejected": -0.438292920589447, "step": 4995 }, { "epoch": 13.678302532511978, "grad_norm": 3.3240413665771484, "learning_rate": 3.1561643835616434e-07, "log_odds_chosen": 3.829653263092041, "log_odds_ratio": -0.207547128200531, "logits/chosen": 1.2499631643295288, "logits/rejected": 1.249308466911316, "logps/chosen": -2.319725275039673, "logps/rejected": -6.060863494873047, "loss": 0.6184, "nll_loss": 0.5976199507713318, "rewards/accuracies": 1.0, "rewards/chosen": -0.23197254538536072, "rewards/margins": 0.3741137981414795, "rewards/rejected": -0.6060863137245178, "step": 4996 }, { "epoch": 13.681040383299111, "grad_norm": 4.72870397567749, "learning_rate": 3.154794520547945e-07, "log_odds_chosen": 2.1240179538726807, "log_odds_ratio": -0.5171972513198853, "logits/chosen": 0.958305299282074, "logits/rejected": 0.9134628772735596, "logps/chosen": -2.021362781524658, "logps/rejected": -4.090001106262207, "loss": 0.5924, "nll_loss": 0.5406507253646851, "rewards/accuracies": 0.75, "rewards/chosen": -0.20213627815246582, "rewards/margins": 0.2068638652563095, "rewards/rejected": -0.40900012850761414, "step": 4997 }, { "epoch": 13.683778234086242, "grad_norm": 3.599925994873047, "learning_rate": 3.1534246575342465e-07, "log_odds_chosen": 3.090158224105835, "log_odds_ratio": -0.1805761456489563, "logits/chosen": 0.8288210034370422, "logits/rejected": 0.8439527750015259, "logps/chosen": -1.8727916479110718, "logps/rejected": -4.774937152862549, "loss": 0.5469, "nll_loss": 0.528828501701355, "rewards/accuracies": 0.875, "rewards/chosen": -0.18727917969226837, "rewards/margins": 0.29021456837654114, "rewards/rejected": -0.4774937629699707, "step": 4998 }, { "epoch": 13.686516084873375, "grad_norm": 4.480846881866455, "learning_rate": 3.152054794520548e-07, "log_odds_chosen": 2.2275071144104004, "log_odds_ratio": -0.21403126418590546, "logits/chosen": 1.0435625314712524, "logits/rejected": 1.0277433395385742, "logps/chosen": -2.4043619632720947, "logps/rejected": -4.496301651000977, "loss": 0.5718, "nll_loss": 0.5504239201545715, "rewards/accuracies": 0.875, "rewards/chosen": -0.24043619632720947, "rewards/margins": 0.20919400453567505, "rewards/rejected": -0.4496302008628845, "step": 4999 }, { "epoch": 13.689253935660506, "grad_norm": 4.0873942375183105, "learning_rate": 3.150684931506849e-07, "log_odds_chosen": 1.6926968097686768, "log_odds_ratio": -0.4485585689544678, "logits/chosen": 0.9530651569366455, "logits/rejected": 0.9716962575912476, "logps/chosen": -1.9870855808258057, "logps/rejected": -3.516979694366455, "loss": 0.5455, "nll_loss": 0.5006426572799683, "rewards/accuracies": 0.75, "rewards/chosen": -0.19870856404304504, "rewards/margins": 0.15298941731452942, "rewards/rejected": -0.35169798135757446, "step": 5000 }, { "epoch": 13.69199178644764, "grad_norm": 3.7853429317474365, "learning_rate": 3.1493150684931505e-07, "log_odds_chosen": 3.3396756649017334, "log_odds_ratio": -0.15951034426689148, "logits/chosen": 1.181900978088379, "logits/rejected": 1.1335870027542114, "logps/chosen": -1.4096753597259521, "logps/rejected": -4.43880033493042, "loss": 0.484, "nll_loss": 0.4680682122707367, "rewards/accuracies": 1.0, "rewards/chosen": -0.14096754789352417, "rewards/margins": 0.30291247367858887, "rewards/rejected": -0.4438800513744354, "step": 5001 }, { "epoch": 13.69472963723477, "grad_norm": 3.680742025375366, "learning_rate": 3.147945205479452e-07, "log_odds_chosen": 2.4440810680389404, "log_odds_ratio": -0.27395543456077576, "logits/chosen": 1.136258840560913, "logits/rejected": 1.099812388420105, "logps/chosen": -1.8213553428649902, "logps/rejected": -4.137084007263184, "loss": 0.5984, "nll_loss": 0.570977509021759, "rewards/accuracies": 1.0, "rewards/chosen": -0.18213552236557007, "rewards/margins": 0.23157286643981934, "rewards/rejected": -0.4137083888053894, "step": 5002 }, { "epoch": 13.697467488021903, "grad_norm": 3.65921688079834, "learning_rate": 3.146575342465753e-07, "log_odds_chosen": 2.619398593902588, "log_odds_ratio": -0.24683363735675812, "logits/chosen": 1.026732325553894, "logits/rejected": 1.011797547340393, "logps/chosen": -1.9710299968719482, "logps/rejected": -4.470151901245117, "loss": 0.4922, "nll_loss": 0.46748048067092896, "rewards/accuracies": 0.875, "rewards/chosen": -0.19710299372673035, "rewards/margins": 0.24991217255592346, "rewards/rejected": -0.4470151662826538, "step": 5003 }, { "epoch": 13.700205338809035, "grad_norm": 3.775801658630371, "learning_rate": 3.1452054794520545e-07, "log_odds_chosen": 3.3597817420959473, "log_odds_ratio": -0.17349310219287872, "logits/chosen": 0.8721355199813843, "logits/rejected": 0.761657178401947, "logps/chosen": -1.2738956212997437, "logps/rejected": -4.375210285186768, "loss": 0.5299, "nll_loss": 0.5125941038131714, "rewards/accuracies": 1.0, "rewards/chosen": -0.1273895651102066, "rewards/margins": 0.3101314604282379, "rewards/rejected": -0.4375210404396057, "step": 5004 }, { "epoch": 13.702943189596168, "grad_norm": 4.018764019012451, "learning_rate": 3.143835616438356e-07, "log_odds_chosen": 1.752666711807251, "log_odds_ratio": -0.262677401304245, "logits/chosen": 1.1000688076019287, "logits/rejected": 1.0458250045776367, "logps/chosen": -2.40286922454834, "logps/rejected": -4.0833868980407715, "loss": 0.6186, "nll_loss": 0.5923335552215576, "rewards/accuracies": 0.875, "rewards/chosen": -0.2402869015932083, "rewards/margins": 0.1680518090724945, "rewards/rejected": -0.4083386957645416, "step": 5005 }, { "epoch": 13.705681040383299, "grad_norm": 3.8258769512176514, "learning_rate": 3.1424657534246576e-07, "log_odds_chosen": 1.0052759647369385, "log_odds_ratio": -0.4285121560096741, "logits/chosen": 1.203895926475525, "logits/rejected": 1.2397210597991943, "logps/chosen": -2.1696152687072754, "logps/rejected": -3.1000351905822754, "loss": 0.6281, "nll_loss": 0.5852824449539185, "rewards/accuracies": 0.875, "rewards/chosen": -0.2169615477323532, "rewards/margins": 0.09304197877645493, "rewards/rejected": -0.3100035488605499, "step": 5006 }, { "epoch": 13.708418891170432, "grad_norm": 12.033249855041504, "learning_rate": 3.1410958904109586e-07, "log_odds_chosen": 2.821641206741333, "log_odds_ratio": -0.6133910417556763, "logits/chosen": 1.2512907981872559, "logits/rejected": 1.139405608177185, "logps/chosen": -2.479598045349121, "logps/rejected": -5.105170726776123, "loss": 0.6353, "nll_loss": 0.5739963054656982, "rewards/accuracies": 0.75, "rewards/chosen": -0.24795982241630554, "rewards/margins": 0.2625572383403778, "rewards/rejected": -0.5105170607566833, "step": 5007 }, { "epoch": 13.711156741957563, "grad_norm": 3.4418208599090576, "learning_rate": 3.13972602739726e-07, "log_odds_chosen": 2.8480467796325684, "log_odds_ratio": -0.18201899528503418, "logits/chosen": 0.8883114457130432, "logits/rejected": 0.7829548120498657, "logps/chosen": -1.5603418350219727, "logps/rejected": -4.1984663009643555, "loss": 0.5384, "nll_loss": 0.520167350769043, "rewards/accuracies": 1.0, "rewards/chosen": -0.1560341715812683, "rewards/margins": 0.26381248235702515, "rewards/rejected": -0.41984665393829346, "step": 5008 }, { "epoch": 13.713894592744696, "grad_norm": 6.287350177764893, "learning_rate": 3.1383561643835616e-07, "log_odds_chosen": 2.6749930381774902, "log_odds_ratio": -0.32497987151145935, "logits/chosen": 1.1894992589950562, "logits/rejected": 1.0948859453201294, "logps/chosen": -2.3778700828552246, "logps/rejected": -4.868297576904297, "loss": 0.5786, "nll_loss": 0.5460698008537292, "rewards/accuracies": 0.875, "rewards/chosen": -0.23778699338436127, "rewards/margins": 0.2490427941083908, "rewards/rejected": -0.4868297576904297, "step": 5009 }, { "epoch": 13.716632443531827, "grad_norm": 5.946408748626709, "learning_rate": 3.1369863013698626e-07, "log_odds_chosen": 3.965733766555786, "log_odds_ratio": -0.38400065898895264, "logits/chosen": 1.0849922895431519, "logits/rejected": 1.0319904088974, "logps/chosen": -1.629834532737732, "logps/rejected": -5.191455364227295, "loss": 0.5378, "nll_loss": 0.4994465112686157, "rewards/accuracies": 0.875, "rewards/chosen": -0.1629834771156311, "rewards/margins": 0.35616201162338257, "rewards/rejected": -0.5191454887390137, "step": 5010 }, { "epoch": 13.71937029431896, "grad_norm": 3.5366485118865967, "learning_rate": 3.1356164383561647e-07, "log_odds_chosen": 2.4393575191497803, "log_odds_ratio": -0.19968900084495544, "logits/chosen": 1.171726107597351, "logits/rejected": 1.147563099861145, "logps/chosen": -1.4646103382110596, "logps/rejected": -3.67783260345459, "loss": 0.529, "nll_loss": 0.5090717673301697, "rewards/accuracies": 1.0, "rewards/chosen": -0.14646103978157043, "rewards/margins": 0.2213222086429596, "rewards/rejected": -0.36778324842453003, "step": 5011 }, { "epoch": 13.722108145106091, "grad_norm": 3.6977689266204834, "learning_rate": 3.1342465753424657e-07, "log_odds_chosen": 2.1241345405578613, "log_odds_ratio": -0.22800321877002716, "logits/chosen": 1.106926441192627, "logits/rejected": 1.093923807144165, "logps/chosen": -2.069528341293335, "logps/rejected": -4.078488349914551, "loss": 0.5652, "nll_loss": 0.5424458384513855, "rewards/accuracies": 1.0, "rewards/chosen": -0.20695282518863678, "rewards/margins": 0.2008959949016571, "rewards/rejected": -0.40784886479377747, "step": 5012 }, { "epoch": 13.724845995893224, "grad_norm": 4.21106481552124, "learning_rate": 3.132876712328767e-07, "log_odds_chosen": 2.1903493404388428, "log_odds_ratio": -0.20695364475250244, "logits/chosen": 1.1668450832366943, "logits/rejected": 1.1750919818878174, "logps/chosen": -1.502448558807373, "logps/rejected": -3.452096939086914, "loss": 0.69, "nll_loss": 0.6693106889724731, "rewards/accuracies": 1.0, "rewards/chosen": -0.1502448469400406, "rewards/margins": 0.19496482610702515, "rewards/rejected": -0.34520968794822693, "step": 5013 }, { "epoch": 13.727583846680355, "grad_norm": 4.252901554107666, "learning_rate": 3.131506849315068e-07, "log_odds_chosen": 1.6939139366149902, "log_odds_ratio": -0.30956530570983887, "logits/chosen": 1.1460871696472168, "logits/rejected": 1.1462045907974243, "logps/chosen": -1.580923318862915, "logps/rejected": -3.1440072059631348, "loss": 0.4368, "nll_loss": 0.4058777093887329, "rewards/accuracies": 1.0, "rewards/chosen": -0.15809234976768494, "rewards/margins": 0.1563083827495575, "rewards/rejected": -0.31440073251724243, "step": 5014 }, { "epoch": 13.730321697467488, "grad_norm": 3.712542772293091, "learning_rate": 3.1301369863013697e-07, "log_odds_chosen": 2.0446255207061768, "log_odds_ratio": -0.19127348065376282, "logits/chosen": 0.9367285966873169, "logits/rejected": 0.9440494775772095, "logps/chosen": -2.7889463901519775, "logps/rejected": -4.676541328430176, "loss": 0.6561, "nll_loss": 0.636930525302887, "rewards/accuracies": 0.875, "rewards/chosen": -0.2788946330547333, "rewards/margins": 0.18875952064990997, "rewards/rejected": -0.46765416860580444, "step": 5015 }, { "epoch": 13.73305954825462, "grad_norm": 3.64927077293396, "learning_rate": 3.128767123287671e-07, "log_odds_chosen": 3.1330044269561768, "log_odds_ratio": -0.3181896507740021, "logits/chosen": 1.1207234859466553, "logits/rejected": 1.0798437595367432, "logps/chosen": -2.245429039001465, "logps/rejected": -5.3193278312683105, "loss": 0.6244, "nll_loss": 0.5925855040550232, "rewards/accuracies": 0.875, "rewards/chosen": -0.22454291582107544, "rewards/margins": 0.30738985538482666, "rewards/rejected": -0.5319327712059021, "step": 5016 }, { "epoch": 13.735797399041752, "grad_norm": 3.655653476715088, "learning_rate": 3.127397260273972e-07, "log_odds_chosen": 3.095494270324707, "log_odds_ratio": -0.2552565634250641, "logits/chosen": 1.1867234706878662, "logits/rejected": 1.1817909479141235, "logps/chosen": -2.071636915206909, "logps/rejected": -5.010353088378906, "loss": 0.5179, "nll_loss": 0.4923553466796875, "rewards/accuracies": 1.0, "rewards/chosen": -0.20716366171836853, "rewards/margins": 0.29387167096138, "rewards/rejected": -0.5010353326797485, "step": 5017 }, { "epoch": 13.738535249828884, "grad_norm": 3.1876912117004395, "learning_rate": 3.126027397260274e-07, "log_odds_chosen": 2.699110507965088, "log_odds_ratio": -0.1319306194782257, "logits/chosen": 1.2988770008087158, "logits/rejected": 1.3191275596618652, "logps/chosen": -1.9305951595306396, "logps/rejected": -4.464527606964111, "loss": 0.5104, "nll_loss": 0.4972427487373352, "rewards/accuracies": 1.0, "rewards/chosen": -0.193059504032135, "rewards/margins": 0.2533932328224182, "rewards/rejected": -0.4464527368545532, "step": 5018 }, { "epoch": 13.741273100616016, "grad_norm": 4.332408428192139, "learning_rate": 3.124657534246575e-07, "log_odds_chosen": 1.2942118644714355, "log_odds_ratio": -0.39360862970352173, "logits/chosen": 0.8335487842559814, "logits/rejected": 0.8284832239151001, "logps/chosen": -1.8561146259307861, "logps/rejected": -3.0028738975524902, "loss": 0.5209, "nll_loss": 0.48149919509887695, "rewards/accuracies": 0.875, "rewards/chosen": -0.18561145663261414, "rewards/margins": 0.11467593908309937, "rewards/rejected": -0.3002873957157135, "step": 5019 }, { "epoch": 13.744010951403148, "grad_norm": 7.299999713897705, "learning_rate": 3.123287671232877e-07, "log_odds_chosen": 3.2822442054748535, "log_odds_ratio": -0.21589776873588562, "logits/chosen": 1.2036001682281494, "logits/rejected": 1.1691346168518066, "logps/chosen": -1.9691495895385742, "logps/rejected": -5.076486587524414, "loss": 0.5244, "nll_loss": 0.502856433391571, "rewards/accuracies": 0.875, "rewards/chosen": -0.19691495597362518, "rewards/margins": 0.31073373556137085, "rewards/rejected": -0.5076486468315125, "step": 5020 }, { "epoch": 13.74674880219028, "grad_norm": 3.3551368713378906, "learning_rate": 3.121917808219178e-07, "log_odds_chosen": 2.91937255859375, "log_odds_ratio": -0.16465236246585846, "logits/chosen": 1.1903722286224365, "logits/rejected": 1.186284065246582, "logps/chosen": -1.6203950643539429, "logps/rejected": -4.077313423156738, "loss": 0.4382, "nll_loss": 0.4216916561126709, "rewards/accuracies": 1.0, "rewards/chosen": -0.16203950345516205, "rewards/margins": 0.24569186568260193, "rewards/rejected": -0.4077313542366028, "step": 5021 }, { "epoch": 13.749486652977414, "grad_norm": 6.069489479064941, "learning_rate": 3.1205479452054793e-07, "log_odds_chosen": 3.674743413925171, "log_odds_ratio": -0.29801616072654724, "logits/chosen": 1.1894375085830688, "logits/rejected": 1.1409772634506226, "logps/chosen": -2.393664598464966, "logps/rejected": -5.93607759475708, "loss": 0.6217, "nll_loss": 0.5918591022491455, "rewards/accuracies": 0.875, "rewards/chosen": -0.23936647176742554, "rewards/margins": 0.3542413115501404, "rewards/rejected": -0.5936077833175659, "step": 5022 }, { "epoch": 13.752224503764545, "grad_norm": 4.384952068328857, "learning_rate": 3.119178082191781e-07, "log_odds_chosen": 3.647303581237793, "log_odds_ratio": -0.24886956810951233, "logits/chosen": 0.9325309991836548, "logits/rejected": 0.8677651286125183, "logps/chosen": -2.396024703979492, "logps/rejected": -5.920401096343994, "loss": 0.5732, "nll_loss": 0.5482786893844604, "rewards/accuracies": 1.0, "rewards/chosen": -0.2396024614572525, "rewards/margins": 0.3524376451969147, "rewards/rejected": -0.5920400619506836, "step": 5023 }, { "epoch": 13.754962354551678, "grad_norm": 3.4936277866363525, "learning_rate": 3.117808219178082e-07, "log_odds_chosen": 1.5703295469284058, "log_odds_ratio": -0.27601587772369385, "logits/chosen": 1.2343688011169434, "logits/rejected": 1.0840178728103638, "logps/chosen": -1.3777544498443604, "logps/rejected": -2.733259916305542, "loss": 0.4499, "nll_loss": 0.4222501218318939, "rewards/accuracies": 1.0, "rewards/chosen": -0.1377754509449005, "rewards/margins": 0.13555054366588593, "rewards/rejected": -0.27332600951194763, "step": 5024 }, { "epoch": 13.757700205338809, "grad_norm": 3.295732021331787, "learning_rate": 3.116438356164384e-07, "log_odds_chosen": 1.7115463018417358, "log_odds_ratio": -0.24536344408988953, "logits/chosen": 1.135593295097351, "logits/rejected": 1.0770444869995117, "logps/chosen": -1.4738426208496094, "logps/rejected": -2.9801599979400635, "loss": 0.5288, "nll_loss": 0.504278838634491, "rewards/accuracies": 1.0, "rewards/chosen": -0.14738428592681885, "rewards/margins": 0.15063174068927765, "rewards/rejected": -0.2980160117149353, "step": 5025 }, { "epoch": 13.760438056125942, "grad_norm": 3.732287883758545, "learning_rate": 3.115068493150685e-07, "log_odds_chosen": 2.0852181911468506, "log_odds_ratio": -0.18779250979423523, "logits/chosen": 0.8126502633094788, "logits/rejected": 0.733992338180542, "logps/chosen": -1.311356544494629, "logps/rejected": -3.095397472381592, "loss": 0.4497, "nll_loss": 0.43090206384658813, "rewards/accuracies": 1.0, "rewards/chosen": -0.13113565742969513, "rewards/margins": 0.1784040927886963, "rewards/rejected": -0.3095397353172302, "step": 5026 }, { "epoch": 13.763175906913073, "grad_norm": 4.61279296875, "learning_rate": 3.1136986301369863e-07, "log_odds_chosen": 2.6704444885253906, "log_odds_ratio": -0.23546500504016876, "logits/chosen": 0.8516989350318909, "logits/rejected": 0.9158334136009216, "logps/chosen": -1.9263399839401245, "logps/rejected": -4.481435775756836, "loss": 0.5706, "nll_loss": 0.5470816493034363, "rewards/accuracies": 0.875, "rewards/chosen": -0.1926340013742447, "rewards/margins": 0.25550955533981323, "rewards/rejected": -0.44814354181289673, "step": 5027 }, { "epoch": 13.765913757700206, "grad_norm": 3.556990623474121, "learning_rate": 3.1123287671232873e-07, "log_odds_chosen": 4.1312642097473145, "log_odds_ratio": -0.2270495593547821, "logits/chosen": 1.0662931203842163, "logits/rejected": 1.11104154586792, "logps/chosen": -2.2284059524536133, "logps/rejected": -6.292586326599121, "loss": 0.5593, "nll_loss": 0.5365810394287109, "rewards/accuracies": 0.875, "rewards/chosen": -0.2228405922651291, "rewards/margins": 0.4064180850982666, "rewards/rejected": -0.6292586922645569, "step": 5028 }, { "epoch": 13.768651608487337, "grad_norm": 4.045512676239014, "learning_rate": 3.110958904109589e-07, "log_odds_chosen": 3.573410987854004, "log_odds_ratio": -0.22309699654579163, "logits/chosen": 1.0506504774093628, "logits/rejected": 1.0362169742584229, "logps/chosen": -2.4541053771972656, "logps/rejected": -5.916681289672852, "loss": 0.7728, "nll_loss": 0.7504432797431946, "rewards/accuracies": 1.0, "rewards/chosen": -0.24541054666042328, "rewards/margins": 0.3462575674057007, "rewards/rejected": -0.5916681289672852, "step": 5029 }, { "epoch": 13.77138945927447, "grad_norm": 3.713721752166748, "learning_rate": 3.1095890410958904e-07, "log_odds_chosen": 3.054198741912842, "log_odds_ratio": -0.14350584149360657, "logits/chosen": 1.3120429515838623, "logits/rejected": 1.296015739440918, "logps/chosen": -2.2474169731140137, "logps/rejected": -5.14739465713501, "loss": 0.531, "nll_loss": 0.5166178941726685, "rewards/accuracies": 1.0, "rewards/chosen": -0.22474169731140137, "rewards/margins": 0.28999781608581543, "rewards/rejected": -0.514739453792572, "step": 5030 }, { "epoch": 13.774127310061601, "grad_norm": 4.085696697235107, "learning_rate": 3.1082191780821914e-07, "log_odds_chosen": 2.4225258827209473, "log_odds_ratio": -0.3289645314216614, "logits/chosen": 1.1645046472549438, "logits/rejected": 1.2022993564605713, "logps/chosen": -1.8726294040679932, "logps/rejected": -4.1933064460754395, "loss": 0.6193, "nll_loss": 0.586412787437439, "rewards/accuracies": 1.0, "rewards/chosen": -0.18726293742656708, "rewards/margins": 0.23206770420074463, "rewards/rejected": -0.4193306565284729, "step": 5031 }, { "epoch": 13.776865160848734, "grad_norm": 4.610504150390625, "learning_rate": 3.1068493150684934e-07, "log_odds_chosen": 3.111057758331299, "log_odds_ratio": -0.14794594049453735, "logits/chosen": 1.3389370441436768, "logits/rejected": 1.3806103467941284, "logps/chosen": -2.4074742794036865, "logps/rejected": -5.375979423522949, "loss": 0.5337, "nll_loss": 0.5189103484153748, "rewards/accuracies": 1.0, "rewards/chosen": -0.24074742197990417, "rewards/margins": 0.2968505024909973, "rewards/rejected": -0.5375978946685791, "step": 5032 }, { "epoch": 13.779603011635865, "grad_norm": 3.677581787109375, "learning_rate": 3.1054794520547944e-07, "log_odds_chosen": 3.713487148284912, "log_odds_ratio": -0.20410263538360596, "logits/chosen": 0.9561196565628052, "logits/rejected": 0.9869564175605774, "logps/chosen": -2.0570592880249023, "logps/rejected": -5.611769676208496, "loss": 0.5888, "nll_loss": 0.5683411955833435, "rewards/accuracies": 1.0, "rewards/chosen": -0.2057059407234192, "rewards/margins": 0.35547101497650146, "rewards/rejected": -0.5611769556999207, "step": 5033 }, { "epoch": 13.782340862422998, "grad_norm": 4.069000244140625, "learning_rate": 3.1041095890410954e-07, "log_odds_chosen": 2.449437379837036, "log_odds_ratio": -0.15799333155155182, "logits/chosen": 0.9187828302383423, "logits/rejected": 0.8814787864685059, "logps/chosen": -2.249289035797119, "logps/rejected": -4.491951942443848, "loss": 0.5799, "nll_loss": 0.5640542507171631, "rewards/accuracies": 1.0, "rewards/chosen": -0.22492891550064087, "rewards/margins": 0.22426627576351166, "rewards/rejected": -0.4491952359676361, "step": 5034 }, { "epoch": 13.78507871321013, "grad_norm": 3.2494282722473145, "learning_rate": 3.102739726027397e-07, "log_odds_chosen": 2.289766311645508, "log_odds_ratio": -0.1458217054605484, "logits/chosen": 0.7710279226303101, "logits/rejected": 0.7386120557785034, "logps/chosen": -1.106872797012329, "logps/rejected": -2.945680618286133, "loss": 0.4337, "nll_loss": 0.4191514551639557, "rewards/accuracies": 1.0, "rewards/chosen": -0.11068728566169739, "rewards/margins": 0.1838807910680771, "rewards/rejected": -0.2945680618286133, "step": 5035 }, { "epoch": 13.787816563997263, "grad_norm": 6.866116523742676, "learning_rate": 3.1013698630136984e-07, "log_odds_chosen": 1.4933135509490967, "log_odds_ratio": -0.7085539698600769, "logits/chosen": 1.0070291757583618, "logits/rejected": 1.0654802322387695, "logps/chosen": -2.3698055744171143, "logps/rejected": -3.7684946060180664, "loss": 0.7394, "nll_loss": 0.6685681343078613, "rewards/accuracies": 0.875, "rewards/chosen": -0.23698055744171143, "rewards/margins": 0.13986888527870178, "rewards/rejected": -0.3768494725227356, "step": 5036 }, { "epoch": 13.790554414784394, "grad_norm": 3.4400792121887207, "learning_rate": 3.1e-07, "log_odds_chosen": 3.6569442749023438, "log_odds_ratio": -0.13355420529842377, "logits/chosen": 0.8544690608978271, "logits/rejected": 0.9067671298980713, "logps/chosen": -2.050527811050415, "logps/rejected": -5.517329216003418, "loss": 0.5804, "nll_loss": 0.5670469999313354, "rewards/accuracies": 1.0, "rewards/chosen": -0.20505279302597046, "rewards/margins": 0.3466801047325134, "rewards/rejected": -0.5517328977584839, "step": 5037 }, { "epoch": 13.793292265571527, "grad_norm": 3.7637898921966553, "learning_rate": 3.098630136986301e-07, "log_odds_chosen": 3.488673448562622, "log_odds_ratio": -0.19603992998600006, "logits/chosen": 1.1757272481918335, "logits/rejected": 1.232391357421875, "logps/chosen": -2.475151538848877, "logps/rejected": -5.863155364990234, "loss": 0.6398, "nll_loss": 0.6201733946800232, "rewards/accuracies": 1.0, "rewards/chosen": -0.24751515686511993, "rewards/margins": 0.3388003706932068, "rewards/rejected": -0.5863155126571655, "step": 5038 }, { "epoch": 13.796030116358658, "grad_norm": 3.7284839153289795, "learning_rate": 3.097260273972603e-07, "log_odds_chosen": 1.9528303146362305, "log_odds_ratio": -0.2545158863067627, "logits/chosen": 1.0415180921554565, "logits/rejected": 1.0497405529022217, "logps/chosen": -1.743204116821289, "logps/rejected": -3.545281410217285, "loss": 0.5063, "nll_loss": 0.48083561658859253, "rewards/accuracies": 1.0, "rewards/chosen": -0.17432041466236115, "rewards/margins": 0.1802077293395996, "rewards/rejected": -0.35452815890312195, "step": 5039 }, { "epoch": 13.79876796714579, "grad_norm": 4.060753345489502, "learning_rate": 3.095890410958904e-07, "log_odds_chosen": 2.057525634765625, "log_odds_ratio": -0.23501688241958618, "logits/chosen": 1.273768424987793, "logits/rejected": 1.3398890495300293, "logps/chosen": -1.7913930416107178, "logps/rejected": -3.6858201026916504, "loss": 0.4601, "nll_loss": 0.43661636114120483, "rewards/accuracies": 0.875, "rewards/chosen": -0.17913931608200073, "rewards/margins": 0.1894426941871643, "rewards/rejected": -0.36858201026916504, "step": 5040 }, { "epoch": 13.801505817932922, "grad_norm": 7.2606425285339355, "learning_rate": 3.094520547945205e-07, "log_odds_chosen": 0.6758370995521545, "log_odds_ratio": -0.6031224727630615, "logits/chosen": 1.3145151138305664, "logits/rejected": 1.2993499040603638, "logps/chosen": -2.6241660118103027, "logps/rejected": -3.2221975326538086, "loss": 0.733, "nll_loss": 0.6726909875869751, "rewards/accuracies": 0.75, "rewards/chosen": -0.2624166011810303, "rewards/margins": 0.05980316922068596, "rewards/rejected": -0.32221975922584534, "step": 5041 }, { "epoch": 13.804243668720055, "grad_norm": 3.4278199672698975, "learning_rate": 3.0931506849315065e-07, "log_odds_chosen": 2.6138243675231934, "log_odds_ratio": -0.15980401635169983, "logits/chosen": 1.2607972621917725, "logits/rejected": 1.2997112274169922, "logps/chosen": -2.0026750564575195, "logps/rejected": -4.442356586456299, "loss": 0.4848, "nll_loss": 0.46881958842277527, "rewards/accuracies": 1.0, "rewards/chosen": -0.20026752352714539, "rewards/margins": 0.24396812915802002, "rewards/rejected": -0.444235622882843, "step": 5042 }, { "epoch": 13.806981519507186, "grad_norm": 7.071896553039551, "learning_rate": 3.091780821917808e-07, "log_odds_chosen": 1.8545273542404175, "log_odds_ratio": -0.2569124102592468, "logits/chosen": 1.2257075309753418, "logits/rejected": 1.263204574584961, "logps/chosen": -2.3030550479888916, "logps/rejected": -4.04103946685791, "loss": 0.661, "nll_loss": 0.6353380680084229, "rewards/accuracies": 0.875, "rewards/chosen": -0.2303054928779602, "rewards/margins": 0.17379844188690186, "rewards/rejected": -0.40410393476486206, "step": 5043 }, { "epoch": 13.809719370294319, "grad_norm": 3.575862407684326, "learning_rate": 3.0904109589041096e-07, "log_odds_chosen": 2.7015607357025146, "log_odds_ratio": -0.17065683007240295, "logits/chosen": 1.2271099090576172, "logits/rejected": 1.1985654830932617, "logps/chosen": -1.7401924133300781, "logps/rejected": -4.225672721862793, "loss": 0.5164, "nll_loss": 0.49936816096305847, "rewards/accuracies": 1.0, "rewards/chosen": -0.1740192472934723, "rewards/margins": 0.24854803085327148, "rewards/rejected": -0.4225672781467438, "step": 5044 }, { "epoch": 13.81245722108145, "grad_norm": 7.098913192749023, "learning_rate": 3.0890410958904105e-07, "log_odds_chosen": 2.471348524093628, "log_odds_ratio": -0.2027963250875473, "logits/chosen": 0.8593596816062927, "logits/rejected": 0.8227663040161133, "logps/chosen": -2.6561782360076904, "logps/rejected": -5.0160627365112305, "loss": 0.5976, "nll_loss": 0.5773584842681885, "rewards/accuracies": 1.0, "rewards/chosen": -0.26561781764030457, "rewards/margins": 0.23598846793174744, "rewards/rejected": -0.501606285572052, "step": 5045 }, { "epoch": 13.815195071868583, "grad_norm": 4.0576395988464355, "learning_rate": 3.0876712328767126e-07, "log_odds_chosen": 2.996086597442627, "log_odds_ratio": -0.14829006791114807, "logits/chosen": 0.879875123500824, "logits/rejected": 0.8464370965957642, "logps/chosen": -2.3614914417266846, "logps/rejected": -5.20322847366333, "loss": 0.7059, "nll_loss": 0.6910350322723389, "rewards/accuracies": 1.0, "rewards/chosen": -0.2361491322517395, "rewards/margins": 0.28417375683784485, "rewards/rejected": -0.5203229188919067, "step": 5046 }, { "epoch": 13.817932922655714, "grad_norm": 8.397320747375488, "learning_rate": 3.0863013698630136e-07, "log_odds_chosen": 2.722942590713501, "log_odds_ratio": -0.4997886121273041, "logits/chosen": 1.2622555494308472, "logits/rejected": 1.1901466846466064, "logps/chosen": -2.450303316116333, "logps/rejected": -5.005120277404785, "loss": 0.54, "nll_loss": 0.4900478422641754, "rewards/accuracies": 0.875, "rewards/chosen": -0.24503034353256226, "rewards/margins": 0.25548169016838074, "rewards/rejected": -0.5005120038986206, "step": 5047 }, { "epoch": 13.820670773442847, "grad_norm": 3.8825387954711914, "learning_rate": 3.0849315068493146e-07, "log_odds_chosen": 3.190380811691284, "log_odds_ratio": -0.1590036153793335, "logits/chosen": 0.7905056476593018, "logits/rejected": 0.7446361780166626, "logps/chosen": -2.001913070678711, "logps/rejected": -4.897077560424805, "loss": 0.4899, "nll_loss": 0.4740478992462158, "rewards/accuracies": 1.0, "rewards/chosen": -0.20019130408763885, "rewards/margins": 0.2895164489746094, "rewards/rejected": -0.48970770835876465, "step": 5048 }, { "epoch": 13.82340862422998, "grad_norm": 3.1973819732666016, "learning_rate": 3.0835616438356166e-07, "log_odds_chosen": 3.9438414573669434, "log_odds_ratio": -0.1230674535036087, "logits/chosen": 1.083695650100708, "logits/rejected": 1.05106520652771, "logps/chosen": -2.0404152870178223, "logps/rejected": -5.84377384185791, "loss": 0.5778, "nll_loss": 0.565532922744751, "rewards/accuracies": 1.0, "rewards/chosen": -0.20404154062271118, "rewards/margins": 0.38033589720726013, "rewards/rejected": -0.5843774676322937, "step": 5049 }, { "epoch": 13.826146475017111, "grad_norm": 18.08988380432129, "learning_rate": 3.0821917808219176e-07, "log_odds_chosen": -0.11392691731452942, "log_odds_ratio": -0.9361323118209839, "logits/chosen": 0.8735790252685547, "logits/rejected": 0.8340593576431274, "logps/chosen": -2.834230899810791, "logps/rejected": -2.694978952407837, "loss": 0.6067, "nll_loss": 0.5131310224533081, "rewards/accuracies": 0.5, "rewards/chosen": -0.2834230959415436, "rewards/margins": -0.013925189152359962, "rewards/rejected": -0.26949790120124817, "step": 5050 }, { "epoch": 13.828884325804244, "grad_norm": 3.5238394737243652, "learning_rate": 3.080821917808219e-07, "log_odds_chosen": 3.0674405097961426, "log_odds_ratio": -0.1784280240535736, "logits/chosen": 1.1302696466445923, "logits/rejected": 1.039864182472229, "logps/chosen": -1.4814021587371826, "logps/rejected": -4.303778171539307, "loss": 0.507, "nll_loss": 0.48916566371917725, "rewards/accuracies": 1.0, "rewards/chosen": -0.14814022183418274, "rewards/margins": 0.28223761916160583, "rewards/rejected": -0.4303777813911438, "step": 5051 }, { "epoch": 13.831622176591376, "grad_norm": 3.9471044540405273, "learning_rate": 3.07945205479452e-07, "log_odds_chosen": 3.1803743839263916, "log_odds_ratio": -0.1633758544921875, "logits/chosen": 1.063758134841919, "logits/rejected": 1.0617009401321411, "logps/chosen": -1.7207272052764893, "logps/rejected": -4.713454723358154, "loss": 0.5412, "nll_loss": 0.5248523354530334, "rewards/accuracies": 1.0, "rewards/chosen": -0.17207270860671997, "rewards/margins": 0.2992727756500244, "rewards/rejected": -0.4713454842567444, "step": 5052 }, { "epoch": 13.834360027378509, "grad_norm": 6.837131023406982, "learning_rate": 3.078082191780822e-07, "log_odds_chosen": 2.559875726699829, "log_odds_ratio": -0.36940085887908936, "logits/chosen": 0.9168846607208252, "logits/rejected": 0.8549262285232544, "logps/chosen": -2.5345003604888916, "logps/rejected": -4.98738956451416, "loss": 0.6737, "nll_loss": 0.636717677116394, "rewards/accuracies": 0.75, "rewards/chosen": -0.25345003604888916, "rewards/margins": 0.24528896808624268, "rewards/rejected": -0.49873900413513184, "step": 5053 }, { "epoch": 13.83709787816564, "grad_norm": 3.3504390716552734, "learning_rate": 3.076712328767123e-07, "log_odds_chosen": 5.367537498474121, "log_odds_ratio": -0.09433357417583466, "logits/chosen": 1.0535491704940796, "logits/rejected": 1.0376662015914917, "logps/chosen": -2.214804172515869, "logps/rejected": -7.432294845581055, "loss": 0.5651, "nll_loss": 0.55565345287323, "rewards/accuracies": 1.0, "rewards/chosen": -0.22148041427135468, "rewards/margins": 0.5217490792274475, "rewards/rejected": -0.7432295083999634, "step": 5054 }, { "epoch": 13.839835728952773, "grad_norm": 4.558873176574707, "learning_rate": 3.075342465753424e-07, "log_odds_chosen": 3.3416495323181152, "log_odds_ratio": -0.24140600860118866, "logits/chosen": 1.2748045921325684, "logits/rejected": 1.3295483589172363, "logps/chosen": -2.3246347904205322, "logps/rejected": -5.561514854431152, "loss": 0.6173, "nll_loss": 0.5931494235992432, "rewards/accuracies": 0.875, "rewards/chosen": -0.23246349394321442, "rewards/margins": 0.32368800044059753, "rewards/rejected": -0.5561515092849731, "step": 5055 }, { "epoch": 13.842573579739904, "grad_norm": 3.1414499282836914, "learning_rate": 3.073972602739726e-07, "log_odds_chosen": 3.09805965423584, "log_odds_ratio": -0.13244393467903137, "logits/chosen": 1.4801974296569824, "logits/rejected": 1.5225932598114014, "logps/chosen": -1.7215510606765747, "logps/rejected": -4.628957748413086, "loss": 0.5135, "nll_loss": 0.5002780556678772, "rewards/accuracies": 1.0, "rewards/chosen": -0.17215511202812195, "rewards/margins": 0.2907406687736511, "rewards/rejected": -0.46289581060409546, "step": 5056 }, { "epoch": 13.845311430527037, "grad_norm": 3.529340982437134, "learning_rate": 3.072602739726027e-07, "log_odds_chosen": 3.1759517192840576, "log_odds_ratio": -0.2152109444141388, "logits/chosen": 0.9023548364639282, "logits/rejected": 0.8992606401443481, "logps/chosen": -1.7556511163711548, "logps/rejected": -4.782826900482178, "loss": 0.5587, "nll_loss": 0.5371679663658142, "rewards/accuracies": 0.875, "rewards/chosen": -0.17556512355804443, "rewards/margins": 0.3027175962924957, "rewards/rejected": -0.47828271985054016, "step": 5057 }, { "epoch": 13.848049281314168, "grad_norm": 4.001431465148926, "learning_rate": 3.0712328767123287e-07, "log_odds_chosen": 4.302511215209961, "log_odds_ratio": -0.23732545971870422, "logits/chosen": 1.1746083498001099, "logits/rejected": 1.2057654857635498, "logps/chosen": -2.3805341720581055, "logps/rejected": -6.580328941345215, "loss": 0.7251, "nll_loss": 0.7013323307037354, "rewards/accuracies": 0.875, "rewards/chosen": -0.23805344104766846, "rewards/margins": 0.4199795126914978, "rewards/rejected": -0.6580329537391663, "step": 5058 }, { "epoch": 13.850787132101301, "grad_norm": 5.443025588989258, "learning_rate": 3.0698630136986297e-07, "log_odds_chosen": 4.208261489868164, "log_odds_ratio": -0.13944846391677856, "logits/chosen": 1.2915558815002441, "logits/rejected": 1.2490999698638916, "logps/chosen": -2.669013500213623, "logps/rejected": -6.767359733581543, "loss": 0.7527, "nll_loss": 0.7387570142745972, "rewards/accuracies": 1.0, "rewards/chosen": -0.2669013440608978, "rewards/margins": 0.40983468294143677, "rewards/rejected": -0.6767359972000122, "step": 5059 }, { "epoch": 13.853524982888432, "grad_norm": 6.521500110626221, "learning_rate": 3.068493150684932e-07, "log_odds_chosen": 2.9992964267730713, "log_odds_ratio": -0.2080843150615692, "logits/chosen": 1.1216927766799927, "logits/rejected": 1.1184229850769043, "logps/chosen": -2.3289549350738525, "logps/rejected": -5.226170063018799, "loss": 0.6111, "nll_loss": 0.5902925729751587, "rewards/accuracies": 1.0, "rewards/chosen": -0.23289549350738525, "rewards/margins": 0.2897214889526367, "rewards/rejected": -0.522616982460022, "step": 5060 }, { "epoch": 13.856262833675565, "grad_norm": 3.6700963973999023, "learning_rate": 3.067123287671233e-07, "log_odds_chosen": 2.7064781188964844, "log_odds_ratio": -0.22750848531723022, "logits/chosen": 1.0500423908233643, "logits/rejected": 1.06356942653656, "logps/chosen": -1.9117836952209473, "logps/rejected": -4.4454240798950195, "loss": 0.677, "nll_loss": 0.654258131980896, "rewards/accuracies": 0.875, "rewards/chosen": -0.1911783665418625, "rewards/margins": 0.25336402654647827, "rewards/rejected": -0.44454240798950195, "step": 5061 }, { "epoch": 13.859000684462696, "grad_norm": 4.6833038330078125, "learning_rate": 3.065753424657534e-07, "log_odds_chosen": 1.9787929058074951, "log_odds_ratio": -0.26449865102767944, "logits/chosen": 1.1437174081802368, "logits/rejected": 1.1042289733886719, "logps/chosen": -2.0209765434265137, "logps/rejected": -3.8196895122528076, "loss": 0.5695, "nll_loss": 0.5430867671966553, "rewards/accuracies": 0.875, "rewards/chosen": -0.20209762454032898, "rewards/margins": 0.1798713058233261, "rewards/rejected": -0.3819689452648163, "step": 5062 }, { "epoch": 13.86173853524983, "grad_norm": 3.9473419189453125, "learning_rate": 3.064383561643836e-07, "log_odds_chosen": 2.768603801727295, "log_odds_ratio": -0.16129589080810547, "logits/chosen": 1.0533812046051025, "logits/rejected": 1.0949249267578125, "logps/chosen": -1.5009838342666626, "logps/rejected": -4.008118152618408, "loss": 0.5152, "nll_loss": 0.49909988045692444, "rewards/accuracies": 1.0, "rewards/chosen": -0.15009836852550507, "rewards/margins": 0.25071343779563904, "rewards/rejected": -0.4008118212223053, "step": 5063 }, { "epoch": 13.86447638603696, "grad_norm": 3.9666504859924316, "learning_rate": 3.063013698630137e-07, "log_odds_chosen": 2.5464372634887695, "log_odds_ratio": -0.21069873869419098, "logits/chosen": 0.9758834838867188, "logits/rejected": 0.888293981552124, "logps/chosen": -2.1048717498779297, "logps/rejected": -4.4989013671875, "loss": 0.4763, "nll_loss": 0.4552067518234253, "rewards/accuracies": 1.0, "rewards/chosen": -0.21048718690872192, "rewards/margins": 0.23940296471118927, "rewards/rejected": -0.44989013671875, "step": 5064 }, { "epoch": 13.867214236824093, "grad_norm": 3.8249759674072266, "learning_rate": 3.0616438356164383e-07, "log_odds_chosen": 0.9261432886123657, "log_odds_ratio": -0.42067092657089233, "logits/chosen": 1.1499149799346924, "logits/rejected": 1.101340413093567, "logps/chosen": -1.5912158489227295, "logps/rejected": -2.418928623199463, "loss": 0.4662, "nll_loss": 0.42415308952331543, "rewards/accuracies": 0.875, "rewards/chosen": -0.159121572971344, "rewards/margins": 0.08277128636837006, "rewards/rejected": -0.24189287424087524, "step": 5065 }, { "epoch": 13.869952087611225, "grad_norm": 3.2281858921051025, "learning_rate": 3.0602739726027393e-07, "log_odds_chosen": 2.143739700317383, "log_odds_ratio": -0.25376033782958984, "logits/chosen": 1.3819868564605713, "logits/rejected": 1.3763768672943115, "logps/chosen": -1.3595916032791138, "logps/rejected": -3.271721839904785, "loss": 0.4532, "nll_loss": 0.4278142750263214, "rewards/accuracies": 1.0, "rewards/chosen": -0.13595916330814362, "rewards/margins": 0.191212996840477, "rewards/rejected": -0.327172189950943, "step": 5066 }, { "epoch": 13.872689938398358, "grad_norm": 6.859872817993164, "learning_rate": 3.0589041095890414e-07, "log_odds_chosen": 1.294023036956787, "log_odds_ratio": -0.6050320863723755, "logits/chosen": 0.8261406421661377, "logits/rejected": 0.844200611114502, "logps/chosen": -2.2202258110046387, "logps/rejected": -3.358454704284668, "loss": 0.6706, "nll_loss": 0.6101317405700684, "rewards/accuracies": 0.875, "rewards/chosen": -0.22202256321907043, "rewards/margins": 0.11382289230823517, "rewards/rejected": -0.3358454704284668, "step": 5067 }, { "epoch": 13.875427789185489, "grad_norm": 3.5199451446533203, "learning_rate": 3.0575342465753423e-07, "log_odds_chosen": 3.0762882232666016, "log_odds_ratio": -0.09651437401771545, "logits/chosen": 0.9494691491127014, "logits/rejected": 0.9332813620567322, "logps/chosen": -1.7384448051452637, "logps/rejected": -4.49257755279541, "loss": 0.5455, "nll_loss": 0.535887598991394, "rewards/accuracies": 1.0, "rewards/chosen": -0.17384448647499084, "rewards/margins": 0.27541327476501465, "rewards/rejected": -0.4492577910423279, "step": 5068 }, { "epoch": 13.878165639972622, "grad_norm": 4.25496244430542, "learning_rate": 3.0561643835616433e-07, "log_odds_chosen": 3.641172409057617, "log_odds_ratio": -0.208307147026062, "logits/chosen": 1.2167110443115234, "logits/rejected": 1.2753236293792725, "logps/chosen": -3.233938217163086, "logps/rejected": -6.623844146728516, "loss": 0.7837, "nll_loss": 0.7628657817840576, "rewards/accuracies": 1.0, "rewards/chosen": -0.3233938217163086, "rewards/margins": 0.33899062871932983, "rewards/rejected": -0.6623844504356384, "step": 5069 }, { "epoch": 13.880903490759753, "grad_norm": 3.454092025756836, "learning_rate": 3.0547945205479454e-07, "log_odds_chosen": 2.4762091636657715, "log_odds_ratio": -0.26845288276672363, "logits/chosen": 0.7783482670783997, "logits/rejected": 0.7921602725982666, "logps/chosen": -1.733589768409729, "logps/rejected": -3.983389377593994, "loss": 0.5887, "nll_loss": 0.5618829727172852, "rewards/accuracies": 0.875, "rewards/chosen": -0.1733589768409729, "rewards/margins": 0.2249799370765686, "rewards/rejected": -0.3983389437198639, "step": 5070 }, { "epoch": 13.883641341546886, "grad_norm": 3.386793375015259, "learning_rate": 3.0534246575342464e-07, "log_odds_chosen": 2.323967456817627, "log_odds_ratio": -0.14965927600860596, "logits/chosen": 1.2144163846969604, "logits/rejected": 1.1160697937011719, "logps/chosen": -1.5942106246948242, "logps/rejected": -3.673830509185791, "loss": 0.4964, "nll_loss": 0.4814271330833435, "rewards/accuracies": 1.0, "rewards/chosen": -0.15942108631134033, "rewards/margins": 0.20796197652816772, "rewards/rejected": -0.36738306283950806, "step": 5071 }, { "epoch": 13.886379192334019, "grad_norm": 9.27576732635498, "learning_rate": 3.052054794520548e-07, "log_odds_chosen": 3.665050983428955, "log_odds_ratio": -0.3042571246623993, "logits/chosen": 1.246059775352478, "logits/rejected": 1.2243722677230835, "logps/chosen": -2.0431129932403564, "logps/rejected": -5.437136173248291, "loss": 0.5147, "nll_loss": 0.4842400550842285, "rewards/accuracies": 0.875, "rewards/chosen": -0.2043113112449646, "rewards/margins": 0.33940231800079346, "rewards/rejected": -0.5437136292457581, "step": 5072 }, { "epoch": 13.88911704312115, "grad_norm": 5.4443488121032715, "learning_rate": 3.050684931506849e-07, "log_odds_chosen": 2.5225932598114014, "log_odds_ratio": -0.2951001822948456, "logits/chosen": 1.2603868246078491, "logits/rejected": 1.2705812454223633, "logps/chosen": -2.079814910888672, "logps/rejected": -4.444766044616699, "loss": 0.5535, "nll_loss": 0.5239922404289246, "rewards/accuracies": 0.875, "rewards/chosen": -0.20798151195049286, "rewards/margins": 0.23649509251117706, "rewards/rejected": -0.4444766044616699, "step": 5073 }, { "epoch": 13.891854893908281, "grad_norm": 3.9623863697052, "learning_rate": 3.049315068493151e-07, "log_odds_chosen": 2.627106189727783, "log_odds_ratio": -0.30838796496391296, "logits/chosen": 1.1081156730651855, "logits/rejected": 1.105594515800476, "logps/chosen": -2.0144639015197754, "logps/rejected": -4.468140602111816, "loss": 0.5394, "nll_loss": 0.5085983276367188, "rewards/accuracies": 0.875, "rewards/chosen": -0.20144638419151306, "rewards/margins": 0.24536770582199097, "rewards/rejected": -0.44681406021118164, "step": 5074 }, { "epoch": 13.894592744695414, "grad_norm": 3.313626289367676, "learning_rate": 3.047945205479452e-07, "log_odds_chosen": 2.576746940612793, "log_odds_ratio": -0.23391839861869812, "logits/chosen": 1.0883049964904785, "logits/rejected": 1.127199649810791, "logps/chosen": -2.1956114768981934, "logps/rejected": -4.636439323425293, "loss": 0.5581, "nll_loss": 0.5346981883049011, "rewards/accuracies": 1.0, "rewards/chosen": -0.2195611447095871, "rewards/margins": 0.24408279359340668, "rewards/rejected": -0.46364396810531616, "step": 5075 }, { "epoch": 13.897330595482547, "grad_norm": 4.254727363586426, "learning_rate": 3.046575342465753e-07, "log_odds_chosen": 3.0374021530151367, "log_odds_ratio": -0.22444583475589752, "logits/chosen": 1.1093156337738037, "logits/rejected": 1.1104069948196411, "logps/chosen": -1.393086552619934, "logps/rejected": -4.224706172943115, "loss": 0.531, "nll_loss": 0.5085136294364929, "rewards/accuracies": 1.0, "rewards/chosen": -0.1393086463212967, "rewards/margins": 0.2831619679927826, "rewards/rejected": -0.4224705994129181, "step": 5076 }, { "epoch": 13.900068446269678, "grad_norm": 4.351066589355469, "learning_rate": 3.045205479452055e-07, "log_odds_chosen": 2.7700042724609375, "log_odds_ratio": -0.16917137801647186, "logits/chosen": 1.3741687536239624, "logits/rejected": 1.3442580699920654, "logps/chosen": -1.8460780382156372, "logps/rejected": -4.457361221313477, "loss": 0.5292, "nll_loss": 0.5123269557952881, "rewards/accuracies": 1.0, "rewards/chosen": -0.18460780382156372, "rewards/margins": 0.26112833619117737, "rewards/rejected": -0.4457361400127411, "step": 5077 }, { "epoch": 13.902806297056811, "grad_norm": 7.859649181365967, "learning_rate": 3.043835616438356e-07, "log_odds_chosen": 3.59293794631958, "log_odds_ratio": -0.3010551631450653, "logits/chosen": 1.452862024307251, "logits/rejected": 1.5069499015808105, "logps/chosen": -2.305983066558838, "logps/rejected": -5.600481033325195, "loss": 0.6425, "nll_loss": 0.6123509407043457, "rewards/accuracies": 0.875, "rewards/chosen": -0.2305983155965805, "rewards/margins": 0.3294498026371002, "rewards/rejected": -0.5600481033325195, "step": 5078 }, { "epoch": 13.905544147843942, "grad_norm": 3.7005691528320312, "learning_rate": 3.0424657534246575e-07, "log_odds_chosen": 1.6501762866973877, "log_odds_ratio": -0.25548750162124634, "logits/chosen": 1.1099185943603516, "logits/rejected": 1.0487703084945679, "logps/chosen": -1.36907160282135, "logps/rejected": -2.793834686279297, "loss": 0.4143, "nll_loss": 0.3887541592121124, "rewards/accuracies": 1.0, "rewards/chosen": -0.136907160282135, "rewards/margins": 0.14247629046440125, "rewards/rejected": -0.27938345074653625, "step": 5079 }, { "epoch": 13.908281998631075, "grad_norm": 3.866811513900757, "learning_rate": 3.041095890410959e-07, "log_odds_chosen": 2.826075792312622, "log_odds_ratio": -0.18125253915786743, "logits/chosen": 0.8360244035720825, "logits/rejected": 0.7462940812110901, "logps/chosen": -1.3432711362838745, "logps/rejected": -3.919766902923584, "loss": 0.487, "nll_loss": 0.4688926935195923, "rewards/accuracies": 1.0, "rewards/chosen": -0.13432711362838745, "rewards/margins": 0.25764957070350647, "rewards/rejected": -0.3919767141342163, "step": 5080 }, { "epoch": 13.911019849418206, "grad_norm": 4.079242706298828, "learning_rate": 3.03972602739726e-07, "log_odds_chosen": 2.420194625854492, "log_odds_ratio": -0.20283588767051697, "logits/chosen": 0.845526397228241, "logits/rejected": 0.8623830676078796, "logps/chosen": -1.9606560468673706, "logps/rejected": -4.176194190979004, "loss": 0.462, "nll_loss": 0.44175443053245544, "rewards/accuracies": 1.0, "rewards/chosen": -0.19606561958789825, "rewards/margins": 0.22155383229255676, "rewards/rejected": -0.41761940717697144, "step": 5081 }, { "epoch": 13.91375770020534, "grad_norm": 3.597987413406372, "learning_rate": 3.0383561643835615e-07, "log_odds_chosen": 2.089646100997925, "log_odds_ratio": -0.3160251975059509, "logits/chosen": 1.1936182975769043, "logits/rejected": 1.168637990951538, "logps/chosen": -1.478074312210083, "logps/rejected": -3.39993953704834, "loss": 0.4949, "nll_loss": 0.4633274972438812, "rewards/accuracies": 1.0, "rewards/chosen": -0.14780743420124054, "rewards/margins": 0.19218650460243225, "rewards/rejected": -0.339993953704834, "step": 5082 }, { "epoch": 13.91649555099247, "grad_norm": 4.188426494598389, "learning_rate": 3.0369863013698625e-07, "log_odds_chosen": 3.5007309913635254, "log_odds_ratio": -0.08255012333393097, "logits/chosen": 0.9780148863792419, "logits/rejected": 0.9256790280342102, "logps/chosen": -1.9511690139770508, "logps/rejected": -5.2448625564575195, "loss": 0.501, "nll_loss": 0.49276554584503174, "rewards/accuracies": 1.0, "rewards/chosen": -0.19511690735816956, "rewards/margins": 0.32936936616897583, "rewards/rejected": -0.5244863033294678, "step": 5083 }, { "epoch": 13.919233401779604, "grad_norm": 6.636415481567383, "learning_rate": 3.0356164383561646e-07, "log_odds_chosen": 1.5215836763381958, "log_odds_ratio": -0.3122304081916809, "logits/chosen": 0.8455315828323364, "logits/rejected": 0.8179126381874084, "logps/chosen": -2.1392951011657715, "logps/rejected": -3.550356864929199, "loss": 0.5696, "nll_loss": 0.5384023785591125, "rewards/accuracies": 1.0, "rewards/chosen": -0.21392951905727386, "rewards/margins": 0.14110617339611053, "rewards/rejected": -0.3550356924533844, "step": 5084 }, { "epoch": 13.921971252566735, "grad_norm": 8.145554542541504, "learning_rate": 3.0342465753424656e-07, "log_odds_chosen": 2.6438961029052734, "log_odds_ratio": -0.4936300814151764, "logits/chosen": 1.0997984409332275, "logits/rejected": 1.111917495727539, "logps/chosen": -1.8571949005126953, "logps/rejected": -4.243015766143799, "loss": 0.523, "nll_loss": 0.4736441969871521, "rewards/accuracies": 0.875, "rewards/chosen": -0.18571949005126953, "rewards/margins": 0.2385820746421814, "rewards/rejected": -0.4243015646934509, "step": 5085 }, { "epoch": 13.924709103353868, "grad_norm": 7.707729339599609, "learning_rate": 3.032876712328767e-07, "log_odds_chosen": 3.3363213539123535, "log_odds_ratio": -0.5531138181686401, "logits/chosen": 1.2874271869659424, "logits/rejected": 1.3229482173919678, "logps/chosen": -2.614112377166748, "logps/rejected": -5.7530293464660645, "loss": 0.685, "nll_loss": 0.6297135353088379, "rewards/accuracies": 0.875, "rewards/chosen": -0.26141124963760376, "rewards/margins": 0.3138917088508606, "rewards/rejected": -0.5753029584884644, "step": 5086 }, { "epoch": 13.927446954140999, "grad_norm": 4.726995944976807, "learning_rate": 3.0315068493150686e-07, "log_odds_chosen": 2.0510265827178955, "log_odds_ratio": -0.29003795981407166, "logits/chosen": 1.204552173614502, "logits/rejected": 1.2256107330322266, "logps/chosen": -2.36527681350708, "logps/rejected": -4.296072006225586, "loss": 0.5751, "nll_loss": 0.5460563898086548, "rewards/accuracies": 0.875, "rewards/chosen": -0.236527681350708, "rewards/margins": 0.19307953119277954, "rewards/rejected": -0.42960721254348755, "step": 5087 }, { "epoch": 13.930184804928132, "grad_norm": 4.565207004547119, "learning_rate": 3.0301369863013696e-07, "log_odds_chosen": 2.059049606323242, "log_odds_ratio": -0.4458240866661072, "logits/chosen": 1.103242039680481, "logits/rejected": 1.0364222526550293, "logps/chosen": -2.0122735500335693, "logps/rejected": -3.9607036113739014, "loss": 0.6401, "nll_loss": 0.595556914806366, "rewards/accuracies": 0.75, "rewards/chosen": -0.2012273520231247, "rewards/margins": 0.19484300911426544, "rewards/rejected": -0.3960703909397125, "step": 5088 }, { "epoch": 13.932922655715263, "grad_norm": 5.7206268310546875, "learning_rate": 3.028767123287671e-07, "log_odds_chosen": 2.1043505668640137, "log_odds_ratio": -0.3943813443183899, "logits/chosen": 1.3472838401794434, "logits/rejected": 1.3084099292755127, "logps/chosen": -1.773136854171753, "logps/rejected": -3.62166166305542, "loss": 0.5112, "nll_loss": 0.4717698395252228, "rewards/accuracies": 0.875, "rewards/chosen": -0.1773136854171753, "rewards/margins": 0.1848524808883667, "rewards/rejected": -0.362166166305542, "step": 5089 }, { "epoch": 13.935660506502396, "grad_norm": 3.218766927719116, "learning_rate": 3.027397260273972e-07, "log_odds_chosen": 2.1274099349975586, "log_odds_ratio": -0.1763749122619629, "logits/chosen": 1.077090859413147, "logits/rejected": 1.0240131616592407, "logps/chosen": -1.1991386413574219, "logps/rejected": -3.0073304176330566, "loss": 0.4175, "nll_loss": 0.39987945556640625, "rewards/accuracies": 1.0, "rewards/chosen": -0.11991386115550995, "rewards/margins": 0.18081916868686676, "rewards/rejected": -0.3007330298423767, "step": 5090 }, { "epoch": 13.938398357289527, "grad_norm": 4.155008316040039, "learning_rate": 3.026027397260274e-07, "log_odds_chosen": 3.8461666107177734, "log_odds_ratio": -0.16736122965812683, "logits/chosen": 1.0881837606430054, "logits/rejected": 0.9654498100280762, "logps/chosen": -1.74251127243042, "logps/rejected": -5.4191179275512695, "loss": 0.5505, "nll_loss": 0.533731997013092, "rewards/accuracies": 1.0, "rewards/chosen": -0.17425113916397095, "rewards/margins": 0.36766064167022705, "rewards/rejected": -0.5419118404388428, "step": 5091 }, { "epoch": 13.94113620807666, "grad_norm": 3.5645627975463867, "learning_rate": 3.024657534246575e-07, "log_odds_chosen": 2.3834030628204346, "log_odds_ratio": -0.1633443832397461, "logits/chosen": 1.2333236932754517, "logits/rejected": 1.214093804359436, "logps/chosen": -2.081446647644043, "logps/rejected": -4.289785861968994, "loss": 0.5177, "nll_loss": 0.5013169646263123, "rewards/accuracies": 1.0, "rewards/chosen": -0.2081446796655655, "rewards/margins": 0.220833882689476, "rewards/rejected": -0.4289785623550415, "step": 5092 }, { "epoch": 13.943874058863791, "grad_norm": 4.176219940185547, "learning_rate": 3.0232876712328767e-07, "log_odds_chosen": 2.24709415435791, "log_odds_ratio": -0.30206626653671265, "logits/chosen": 0.8915524482727051, "logits/rejected": 0.8887227177619934, "logps/chosen": -1.9638742208480835, "logps/rejected": -4.056173801422119, "loss": 0.5692, "nll_loss": 0.5389462113380432, "rewards/accuracies": 0.875, "rewards/chosen": -0.19638743996620178, "rewards/margins": 0.2092299461364746, "rewards/rejected": -0.4056173861026764, "step": 5093 }, { "epoch": 13.946611909650924, "grad_norm": 6.064374923706055, "learning_rate": 3.021917808219178e-07, "log_odds_chosen": 1.2192083597183228, "log_odds_ratio": -0.6068131327629089, "logits/chosen": 0.7170493602752686, "logits/rejected": 0.6636955738067627, "logps/chosen": -2.308093309402466, "logps/rejected": -3.35957932472229, "loss": 0.5735, "nll_loss": 0.5128169655799866, "rewards/accuracies": 0.75, "rewards/chosen": -0.23080933094024658, "rewards/margins": 0.10514859855175018, "rewards/rejected": -0.33595791459083557, "step": 5094 }, { "epoch": 13.949349760438055, "grad_norm": 3.567108154296875, "learning_rate": 3.020547945205479e-07, "log_odds_chosen": 2.586259365081787, "log_odds_ratio": -0.33986273407936096, "logits/chosen": 0.972067654132843, "logits/rejected": 0.9565002918243408, "logps/chosen": -2.067922592163086, "logps/rejected": -4.577053070068359, "loss": 0.506, "nll_loss": 0.47198066115379333, "rewards/accuracies": 0.75, "rewards/chosen": -0.20679225027561188, "rewards/margins": 0.25091302394866943, "rewards/rejected": -0.4577053189277649, "step": 5095 }, { "epoch": 13.952087611225188, "grad_norm": 3.712909460067749, "learning_rate": 3.0191780821917807e-07, "log_odds_chosen": 2.347083330154419, "log_odds_ratio": -0.25457140803337097, "logits/chosen": 1.1622941493988037, "logits/rejected": 1.2004680633544922, "logps/chosen": -2.0104987621307373, "logps/rejected": -4.133411884307861, "loss": 0.5516, "nll_loss": 0.5261136889457703, "rewards/accuracies": 0.875, "rewards/chosen": -0.20104986429214478, "rewards/margins": 0.21229133009910583, "rewards/rejected": -0.4133411943912506, "step": 5096 }, { "epoch": 13.95482546201232, "grad_norm": 5.994004249572754, "learning_rate": 3.0178082191780817e-07, "log_odds_chosen": 2.9724411964416504, "log_odds_ratio": -0.3912297189235687, "logits/chosen": 1.1790755987167358, "logits/rejected": 1.2041126489639282, "logps/chosen": -2.6435022354125977, "logps/rejected": -5.510776996612549, "loss": 0.6031, "nll_loss": 0.564002513885498, "rewards/accuracies": 0.875, "rewards/chosen": -0.2643502354621887, "rewards/margins": 0.2867274880409241, "rewards/rejected": -0.5510777235031128, "step": 5097 }, { "epoch": 13.957563312799453, "grad_norm": 3.354534149169922, "learning_rate": 3.0164383561643837e-07, "log_odds_chosen": 4.824856758117676, "log_odds_ratio": -0.10508429259061813, "logits/chosen": 0.9418686032295227, "logits/rejected": 0.9322693347930908, "logps/chosen": -1.3587241172790527, "logps/rejected": -5.862489700317383, "loss": 0.5229, "nll_loss": 0.5123864412307739, "rewards/accuracies": 1.0, "rewards/chosen": -0.13587242364883423, "rewards/margins": 0.45037660002708435, "rewards/rejected": -0.5862489938735962, "step": 5098 }, { "epoch": 13.960301163586585, "grad_norm": 3.424072742462158, "learning_rate": 3.0150684931506847e-07, "log_odds_chosen": 2.032160520553589, "log_odds_ratio": -0.26043206453323364, "logits/chosen": 0.7286733388900757, "logits/rejected": 0.7617022395133972, "logps/chosen": -1.6662229299545288, "logps/rejected": -3.5143749713897705, "loss": 0.6551, "nll_loss": 0.6290769577026367, "rewards/accuracies": 1.0, "rewards/chosen": -0.16662229597568512, "rewards/margins": 0.1848151981830597, "rewards/rejected": -0.3514374792575836, "step": 5099 }, { "epoch": 13.963039014373717, "grad_norm": 8.446197509765625, "learning_rate": 3.013698630136986e-07, "log_odds_chosen": 2.7907581329345703, "log_odds_ratio": -0.4189077913761139, "logits/chosen": 0.8071415424346924, "logits/rejected": 0.8426636457443237, "logps/chosen": -2.48063325881958, "logps/rejected": -5.030817985534668, "loss": 0.6721, "nll_loss": 0.6301726698875427, "rewards/accuracies": 0.75, "rewards/chosen": -0.24806331098079681, "rewards/margins": 0.2550184726715088, "rewards/rejected": -0.5030817985534668, "step": 5100 }, { "epoch": 13.965776865160848, "grad_norm": 3.9013471603393555, "learning_rate": 3.012328767123288e-07, "log_odds_chosen": 2.380054473876953, "log_odds_ratio": -0.15899989008903503, "logits/chosen": 1.5138885974884033, "logits/rejected": 1.4935986995697021, "logps/chosen": -1.555777907371521, "logps/rejected": -3.6446304321289062, "loss": 0.4565, "nll_loss": 0.44060471653938293, "rewards/accuracies": 1.0, "rewards/chosen": -0.15557777881622314, "rewards/margins": 0.20888525247573853, "rewards/rejected": -0.36446303129196167, "step": 5101 }, { "epoch": 13.96851471594798, "grad_norm": 4.193491458892822, "learning_rate": 3.010958904109589e-07, "log_odds_chosen": 3.718385934829712, "log_odds_ratio": -0.18610337376594543, "logits/chosen": 0.9958763718605042, "logits/rejected": 0.9999879598617554, "logps/chosen": -1.7679407596588135, "logps/rejected": -5.291286945343018, "loss": 0.6246, "nll_loss": 0.6060377955436707, "rewards/accuracies": 1.0, "rewards/chosen": -0.17679408192634583, "rewards/margins": 0.3523346185684204, "rewards/rejected": -0.5291286706924438, "step": 5102 }, { "epoch": 13.971252566735114, "grad_norm": 3.623612403869629, "learning_rate": 3.0095890410958903e-07, "log_odds_chosen": 2.8441147804260254, "log_odds_ratio": -0.2773575484752655, "logits/chosen": 0.9704235196113586, "logits/rejected": 0.9749749302864075, "logps/chosen": -2.2404842376708984, "logps/rejected": -4.974124908447266, "loss": 0.657, "nll_loss": 0.6292312741279602, "rewards/accuracies": 0.875, "rewards/chosen": -0.2240484356880188, "rewards/margins": 0.2733640670776367, "rewards/rejected": -0.4974125027656555, "step": 5103 }, { "epoch": 13.973990417522245, "grad_norm": 4.327528476715088, "learning_rate": 3.0082191780821913e-07, "log_odds_chosen": 1.7125158309936523, "log_odds_ratio": -0.2701135575771332, "logits/chosen": 1.0359137058258057, "logits/rejected": 0.9097368121147156, "logps/chosen": -1.6274970769882202, "logps/rejected": -3.1913533210754395, "loss": 0.5966, "nll_loss": 0.5696094632148743, "rewards/accuracies": 1.0, "rewards/chosen": -0.16274970769882202, "rewards/margins": 0.1563856303691864, "rewards/rejected": -0.3191353678703308, "step": 5104 }, { "epoch": 13.976728268309378, "grad_norm": 3.7607600688934326, "learning_rate": 3.0068493150684933e-07, "log_odds_chosen": 2.55715274810791, "log_odds_ratio": -0.18330465257167816, "logits/chosen": 0.9283179044723511, "logits/rejected": 0.9251286387443542, "logps/chosen": -1.947553038597107, "logps/rejected": -4.34979772567749, "loss": 0.6242, "nll_loss": 0.6058725714683533, "rewards/accuracies": 1.0, "rewards/chosen": -0.19475530087947845, "rewards/margins": 0.2402244508266449, "rewards/rejected": -0.43497973680496216, "step": 5105 }, { "epoch": 13.979466119096509, "grad_norm": 3.651838541030884, "learning_rate": 3.0054794520547943e-07, "log_odds_chosen": 3.176365613937378, "log_odds_ratio": -0.191969633102417, "logits/chosen": 0.9505166411399841, "logits/rejected": 0.93132483959198, "logps/chosen": -1.3419328927993774, "logps/rejected": -4.2458696365356445, "loss": 0.4772, "nll_loss": 0.45797494053840637, "rewards/accuracies": 0.875, "rewards/chosen": -0.1341932862997055, "rewards/margins": 0.2903936803340912, "rewards/rejected": -0.4245869815349579, "step": 5106 }, { "epoch": 13.982203969883642, "grad_norm": 3.5994324684143066, "learning_rate": 3.004109589041096e-07, "log_odds_chosen": 4.196845054626465, "log_odds_ratio": -0.1632654070854187, "logits/chosen": 0.9855456352233887, "logits/rejected": 0.9486819505691528, "logps/chosen": -1.6421294212341309, "logps/rejected": -5.612796783447266, "loss": 0.5797, "nll_loss": 0.5633968114852905, "rewards/accuracies": 1.0, "rewards/chosen": -0.16421295702457428, "rewards/margins": 0.39706677198410034, "rewards/rejected": -0.5612797141075134, "step": 5107 }, { "epoch": 13.984941820670773, "grad_norm": 4.202059745788574, "learning_rate": 3.0027397260273974e-07, "log_odds_chosen": 1.428196907043457, "log_odds_ratio": -0.34405291080474854, "logits/chosen": 1.2290090322494507, "logits/rejected": 1.2368298768997192, "logps/chosen": -2.426561117172241, "logps/rejected": -3.752727508544922, "loss": 0.5959, "nll_loss": 0.5614830255508423, "rewards/accuracies": 0.875, "rewards/chosen": -0.24265612661838531, "rewards/margins": 0.13261662423610687, "rewards/rejected": -0.3752727508544922, "step": 5108 }, { "epoch": 13.987679671457906, "grad_norm": 3.8732521533966064, "learning_rate": 3.0013698630136983e-07, "log_odds_chosen": 1.7733851671218872, "log_odds_ratio": -0.28211337327957153, "logits/chosen": 1.1841233968734741, "logits/rejected": 1.1891449689865112, "logps/chosen": -2.358930826187134, "logps/rejected": -3.988771915435791, "loss": 0.5962, "nll_loss": 0.567986011505127, "rewards/accuracies": 0.875, "rewards/chosen": -0.23589307069778442, "rewards/margins": 0.16298411786556244, "rewards/rejected": -0.39887720346450806, "step": 5109 }, { "epoch": 13.990417522245037, "grad_norm": 3.351515293121338, "learning_rate": 3e-07, "log_odds_chosen": 3.40970516204834, "log_odds_ratio": -0.17508363723754883, "logits/chosen": 1.0699065923690796, "logits/rejected": 1.0911386013031006, "logps/chosen": -1.447985291481018, "logps/rejected": -4.48851203918457, "loss": 0.4749, "nll_loss": 0.45739585161209106, "rewards/accuracies": 1.0, "rewards/chosen": -0.14479853212833405, "rewards/margins": 0.3040526509284973, "rewards/rejected": -0.44885119795799255, "step": 5110 }, { "epoch": 13.99315537303217, "grad_norm": 4.297511100769043, "learning_rate": 2.998630136986301e-07, "log_odds_chosen": 2.7839736938476562, "log_odds_ratio": -0.2987072467803955, "logits/chosen": 1.388338565826416, "logits/rejected": 1.3611252307891846, "logps/chosen": -1.6654329299926758, "logps/rejected": -4.246159076690674, "loss": 0.4849, "nll_loss": 0.4550640285015106, "rewards/accuracies": 0.875, "rewards/chosen": -0.16654330492019653, "rewards/margins": 0.2580726444721222, "rewards/rejected": -0.42461591958999634, "step": 5111 }, { "epoch": 13.995893223819301, "grad_norm": 3.5764236450195312, "learning_rate": 2.997260273972603e-07, "log_odds_chosen": 2.404064416885376, "log_odds_ratio": -0.20936492085456848, "logits/chosen": 0.9748564958572388, "logits/rejected": 0.9619511365890503, "logps/chosen": -1.6171224117279053, "logps/rejected": -3.8534886837005615, "loss": 0.4685, "nll_loss": 0.44759365916252136, "rewards/accuracies": 1.0, "rewards/chosen": -0.16171225905418396, "rewards/margins": 0.22363662719726562, "rewards/rejected": -0.3853488862514496, "step": 5112 }, { "epoch": 13.998631074606434, "grad_norm": 6.966156482696533, "learning_rate": 2.995890410958904e-07, "log_odds_chosen": 2.171262264251709, "log_odds_ratio": -0.3471390902996063, "logits/chosen": 1.1523470878601074, "logits/rejected": 1.149471640586853, "logps/chosen": -2.3215417861938477, "logps/rejected": -4.323723316192627, "loss": 0.5667, "nll_loss": 0.5320107340812683, "rewards/accuracies": 0.875, "rewards/chosen": -0.23215419054031372, "rewards/margins": 0.20021818578243256, "rewards/rejected": -0.4323723316192627, "step": 5113 }, { "epoch": 14.001368925393566, "grad_norm": 3.2776553630828857, "learning_rate": 2.994520547945205e-07, "log_odds_chosen": 3.748462677001953, "log_odds_ratio": -0.17551006376743317, "logits/chosen": 1.0238182544708252, "logits/rejected": 0.9069011807441711, "logps/chosen": -1.5165908336639404, "logps/rejected": -5.020061016082764, "loss": 0.6408, "nll_loss": 0.6232652068138123, "rewards/accuracies": 0.875, "rewards/chosen": -0.15165908634662628, "rewards/margins": 0.35034704208374023, "rewards/rejected": -0.5020061135292053, "step": 5114 }, { "epoch": 14.004106776180699, "grad_norm": 4.249389171600342, "learning_rate": 2.993150684931507e-07, "log_odds_chosen": 4.4803466796875, "log_odds_ratio": -0.2260405421257019, "logits/chosen": 0.9727055430412292, "logits/rejected": 0.9801596403121948, "logps/chosen": -2.1003305912017822, "logps/rejected": -6.362521171569824, "loss": 0.5637, "nll_loss": 0.5410532355308533, "rewards/accuracies": 0.875, "rewards/chosen": -0.21003304421901703, "rewards/margins": 0.4262191355228424, "rewards/rejected": -0.6362521648406982, "step": 5115 }, { "epoch": 14.00684462696783, "grad_norm": 3.4567103385925293, "learning_rate": 2.991780821917808e-07, "log_odds_chosen": 2.8224761486053467, "log_odds_ratio": -0.13501332700252533, "logits/chosen": 1.036063313484192, "logits/rejected": 1.0533335208892822, "logps/chosen": -1.0481393337249756, "logps/rejected": -3.309211254119873, "loss": 0.4343, "nll_loss": 0.42081522941589355, "rewards/accuracies": 1.0, "rewards/chosen": -0.10481393337249756, "rewards/margins": 0.2261071801185608, "rewards/rejected": -0.33092111349105835, "step": 5116 }, { "epoch": 14.009582477754963, "grad_norm": 3.8466219902038574, "learning_rate": 2.9904109589041095e-07, "log_odds_chosen": 1.4500000476837158, "log_odds_ratio": -0.3343929946422577, "logits/chosen": 1.0806936025619507, "logits/rejected": 0.9893589615821838, "logps/chosen": -1.4981818199157715, "logps/rejected": -2.779939651489258, "loss": 0.4485, "nll_loss": 0.4150683581829071, "rewards/accuracies": 1.0, "rewards/chosen": -0.14981818199157715, "rewards/margins": 0.1281757652759552, "rewards/rejected": -0.27799394726753235, "step": 5117 }, { "epoch": 14.012320328542094, "grad_norm": 3.6920573711395264, "learning_rate": 2.989041095890411e-07, "log_odds_chosen": 3.2399024963378906, "log_odds_ratio": -0.21244977414608002, "logits/chosen": 0.8508995771408081, "logits/rejected": 0.7993683218955994, "logps/chosen": -1.308323621749878, "logps/rejected": -4.319697380065918, "loss": 0.5648, "nll_loss": 0.5435669422149658, "rewards/accuracies": 1.0, "rewards/chosen": -0.13083237409591675, "rewards/margins": 0.30113738775253296, "rewards/rejected": -0.43196970224380493, "step": 5118 }, { "epoch": 14.015058179329227, "grad_norm": 4.004027366638184, "learning_rate": 2.9876712328767125e-07, "log_odds_chosen": 1.4670636653900146, "log_odds_ratio": -0.3451719284057617, "logits/chosen": 1.1272845268249512, "logits/rejected": 1.1503877639770508, "logps/chosen": -2.1449480056762695, "logps/rejected": -3.5634076595306396, "loss": 0.575, "nll_loss": 0.5405036211013794, "rewards/accuracies": 0.75, "rewards/chosen": -0.21449482440948486, "rewards/margins": 0.1418459415435791, "rewards/rejected": -0.35634076595306396, "step": 5119 }, { "epoch": 14.017796030116358, "grad_norm": 3.4625132083892822, "learning_rate": 2.9863013698630135e-07, "log_odds_chosen": 2.652517795562744, "log_odds_ratio": -0.1665417104959488, "logits/chosen": 0.9615938663482666, "logits/rejected": 0.9205830693244934, "logps/chosen": -2.023599147796631, "logps/rejected": -4.517661094665527, "loss": 0.5699, "nll_loss": 0.5532388091087341, "rewards/accuracies": 1.0, "rewards/chosen": -0.20235992968082428, "rewards/margins": 0.24940620362758636, "rewards/rejected": -0.45176610350608826, "step": 5120 }, { "epoch": 14.020533880903491, "grad_norm": 7.853316783905029, "learning_rate": 2.9849315068493145e-07, "log_odds_chosen": 3.131009817123413, "log_odds_ratio": -0.7099000215530396, "logits/chosen": 1.0648608207702637, "logits/rejected": 1.0648964643478394, "logps/chosen": -3.042914867401123, "logps/rejected": -6.005247116088867, "loss": 0.6994, "nll_loss": 0.6284089684486389, "rewards/accuracies": 0.875, "rewards/chosen": -0.3042914867401123, "rewards/margins": 0.29623323678970337, "rewards/rejected": -0.6005246639251709, "step": 5121 }, { "epoch": 14.023271731690622, "grad_norm": 3.815028667449951, "learning_rate": 2.9835616438356165e-07, "log_odds_chosen": 2.630241870880127, "log_odds_ratio": -0.19501705467700958, "logits/chosen": 0.8379420042037964, "logits/rejected": 0.883753776550293, "logps/chosen": -2.0658764839172363, "logps/rejected": -4.536081314086914, "loss": 0.5397, "nll_loss": 0.5202165842056274, "rewards/accuracies": 1.0, "rewards/chosen": -0.20658765733242035, "rewards/margins": 0.24702046811580658, "rewards/rejected": -0.45360812544822693, "step": 5122 }, { "epoch": 14.026009582477755, "grad_norm": 4.510495185852051, "learning_rate": 2.9821917808219175e-07, "log_odds_chosen": 2.357353448867798, "log_odds_ratio": -0.23096580803394318, "logits/chosen": 0.8513832092285156, "logits/rejected": 0.8121564388275146, "logps/chosen": -1.8676602840423584, "logps/rejected": -4.099851608276367, "loss": 0.5889, "nll_loss": 0.565799355506897, "rewards/accuracies": 1.0, "rewards/chosen": -0.18676602840423584, "rewards/margins": 0.223219096660614, "rewards/rejected": -0.40998512506484985, "step": 5123 }, { "epoch": 14.028747433264886, "grad_norm": 3.6370320320129395, "learning_rate": 2.980821917808219e-07, "log_odds_chosen": 3.496500015258789, "log_odds_ratio": -0.10911872982978821, "logits/chosen": 0.9001144170761108, "logits/rejected": 0.8662897348403931, "logps/chosen": -1.7319998741149902, "logps/rejected": -5.008214473724365, "loss": 0.5193, "nll_loss": 0.508421778678894, "rewards/accuracies": 1.0, "rewards/chosen": -0.17319998145103455, "rewards/margins": 0.3276214599609375, "rewards/rejected": -0.5008214116096497, "step": 5124 }, { "epoch": 14.03148528405202, "grad_norm": 4.2483906745910645, "learning_rate": 2.9794520547945206e-07, "log_odds_chosen": 1.8940577507019043, "log_odds_ratio": -0.2573050260543823, "logits/chosen": 1.186692714691162, "logits/rejected": 1.1533348560333252, "logps/chosen": -1.8300272226333618, "logps/rejected": -3.4627232551574707, "loss": 0.5418, "nll_loss": 0.5160399675369263, "rewards/accuracies": 1.0, "rewards/chosen": -0.18300274014472961, "rewards/margins": 0.16326960921287537, "rewards/rejected": -0.3462722897529602, "step": 5125 }, { "epoch": 14.03422313483915, "grad_norm": 4.890955924987793, "learning_rate": 2.978082191780822e-07, "log_odds_chosen": 1.9378747940063477, "log_odds_ratio": -0.45666438341140747, "logits/chosen": 1.439922571182251, "logits/rejected": 1.4390755891799927, "logps/chosen": -2.112029790878296, "logps/rejected": -3.9553751945495605, "loss": 0.5981, "nll_loss": 0.5524252653121948, "rewards/accuracies": 0.875, "rewards/chosen": -0.21120299398899078, "rewards/margins": 0.18433453142642975, "rewards/rejected": -0.39553752541542053, "step": 5126 }, { "epoch": 14.036960985626283, "grad_norm": 3.4789741039276123, "learning_rate": 2.976712328767123e-07, "log_odds_chosen": 2.535266160964966, "log_odds_ratio": -0.16629037261009216, "logits/chosen": 0.9713461995124817, "logits/rejected": 0.9706106185913086, "logps/chosen": -1.9114569425582886, "logps/rejected": -4.295122146606445, "loss": 0.5045, "nll_loss": 0.4878453314304352, "rewards/accuracies": 1.0, "rewards/chosen": -0.19114570319652557, "rewards/margins": 0.2383665144443512, "rewards/rejected": -0.4295122027397156, "step": 5127 }, { "epoch": 14.039698836413416, "grad_norm": 4.095282077789307, "learning_rate": 2.975342465753424e-07, "log_odds_chosen": 3.5309183597564697, "log_odds_ratio": -0.12379045784473419, "logits/chosen": 1.3526490926742554, "logits/rejected": 1.3944990634918213, "logps/chosen": -2.4854347705841064, "logps/rejected": -5.854852676391602, "loss": 0.5503, "nll_loss": 0.5379320383071899, "rewards/accuracies": 1.0, "rewards/chosen": -0.24854347109794617, "rewards/margins": 0.33694180846214294, "rewards/rejected": -0.5854852199554443, "step": 5128 }, { "epoch": 14.042436687200547, "grad_norm": 3.441492795944214, "learning_rate": 2.973972602739726e-07, "log_odds_chosen": 3.746173858642578, "log_odds_ratio": -0.1095016747713089, "logits/chosen": 1.2198965549468994, "logits/rejected": 1.2175564765930176, "logps/chosen": -1.8750603199005127, "logps/rejected": -5.407657146453857, "loss": 0.5262, "nll_loss": 0.5152267217636108, "rewards/accuracies": 1.0, "rewards/chosen": -0.18750602006912231, "rewards/margins": 0.3532596528530121, "rewards/rejected": -0.5407657027244568, "step": 5129 }, { "epoch": 14.04517453798768, "grad_norm": 4.192895412445068, "learning_rate": 2.972602739726027e-07, "log_odds_chosen": 1.3439810276031494, "log_odds_ratio": -0.27401411533355713, "logits/chosen": 1.0116169452667236, "logits/rejected": 0.9197026491165161, "logps/chosen": -1.4508452415466309, "logps/rejected": -2.612443208694458, "loss": 0.4565, "nll_loss": 0.4290768802165985, "rewards/accuracies": 1.0, "rewards/chosen": -0.14508453011512756, "rewards/margins": 0.11615978181362152, "rewards/rejected": -0.2612442970275879, "step": 5130 }, { "epoch": 14.047912388774812, "grad_norm": 3.5908985137939453, "learning_rate": 2.9712328767123286e-07, "log_odds_chosen": 3.428050994873047, "log_odds_ratio": -0.22750535607337952, "logits/chosen": 1.0788220167160034, "logits/rejected": 1.052492380142212, "logps/chosen": -1.8151212930679321, "logps/rejected": -5.0900797843933105, "loss": 0.4669, "nll_loss": 0.4441945552825928, "rewards/accuracies": 0.875, "rewards/chosen": -0.18151213228702545, "rewards/margins": 0.32749584317207336, "rewards/rejected": -0.5090079307556152, "step": 5131 }, { "epoch": 14.050650239561945, "grad_norm": 2.9810547828674316, "learning_rate": 2.96986301369863e-07, "log_odds_chosen": 4.242341041564941, "log_odds_ratio": -0.12630164623260498, "logits/chosen": 1.21259605884552, "logits/rejected": 1.2256453037261963, "logps/chosen": -1.6232479810714722, "logps/rejected": -5.678157329559326, "loss": 0.5934, "nll_loss": 0.5807690024375916, "rewards/accuracies": 1.0, "rewards/chosen": -0.16232480108737946, "rewards/margins": 0.4054909348487854, "rewards/rejected": -0.5678157210350037, "step": 5132 }, { "epoch": 14.053388090349076, "grad_norm": 5.6072306632995605, "learning_rate": 2.9684931506849317e-07, "log_odds_chosen": 1.7634460926055908, "log_odds_ratio": -0.3511413335800171, "logits/chosen": 1.1766331195831299, "logits/rejected": 1.1568504571914673, "logps/chosen": -1.8341155052185059, "logps/rejected": -3.3818612098693848, "loss": 0.5188, "nll_loss": 0.48366010189056396, "rewards/accuracies": 0.875, "rewards/chosen": -0.18341156840324402, "rewards/margins": 0.15477456152439117, "rewards/rejected": -0.3381861448287964, "step": 5133 }, { "epoch": 14.056125941136209, "grad_norm": 4.006168365478516, "learning_rate": 2.9671232876712327e-07, "log_odds_chosen": 3.574889898300171, "log_odds_ratio": -0.13509024679660797, "logits/chosen": 1.415014386177063, "logits/rejected": 1.4896175861358643, "logps/chosen": -2.6622650623321533, "logps/rejected": -6.151285648345947, "loss": 0.6334, "nll_loss": 0.6198874711990356, "rewards/accuracies": 1.0, "rewards/chosen": -0.26622650027275085, "rewards/margins": 0.3489020764827728, "rewards/rejected": -0.6151285767555237, "step": 5134 }, { "epoch": 14.05886379192334, "grad_norm": 4.388487815856934, "learning_rate": 2.9657534246575336e-07, "log_odds_chosen": 2.586939573287964, "log_odds_ratio": -0.22829997539520264, "logits/chosen": 1.0479680299758911, "logits/rejected": 0.9511071443557739, "logps/chosen": -1.5484435558319092, "logps/rejected": -3.8746180534362793, "loss": 0.4545, "nll_loss": 0.4316723942756653, "rewards/accuracies": 1.0, "rewards/chosen": -0.15484437346458435, "rewards/margins": 0.23261743783950806, "rewards/rejected": -0.3874618411064148, "step": 5135 }, { "epoch": 14.061601642710473, "grad_norm": 4.010577201843262, "learning_rate": 2.9643835616438357e-07, "log_odds_chosen": 2.092628240585327, "log_odds_ratio": -0.30700695514678955, "logits/chosen": 1.324256420135498, "logits/rejected": 1.2924431562423706, "logps/chosen": -1.6170457601547241, "logps/rejected": -3.5513386726379395, "loss": 0.5603, "nll_loss": 0.529632568359375, "rewards/accuracies": 0.875, "rewards/chosen": -0.16170458495616913, "rewards/margins": 0.19342929124832153, "rewards/rejected": -0.3551338315010071, "step": 5136 }, { "epoch": 14.064339493497604, "grad_norm": 4.486586570739746, "learning_rate": 2.9630136986301367e-07, "log_odds_chosen": 5.8075995445251465, "log_odds_ratio": -0.07560300081968307, "logits/chosen": 0.9332964420318604, "logits/rejected": 0.9228459000587463, "logps/chosen": -2.2266721725463867, "logps/rejected": -7.907005310058594, "loss": 0.6529, "nll_loss": 0.6453694105148315, "rewards/accuracies": 1.0, "rewards/chosen": -0.22266721725463867, "rewards/margins": 0.5680333375930786, "rewards/rejected": -0.7907004952430725, "step": 5137 }, { "epoch": 14.067077344284737, "grad_norm": 3.7686800956726074, "learning_rate": 2.961643835616438e-07, "log_odds_chosen": 3.3414087295532227, "log_odds_ratio": -0.19626882672309875, "logits/chosen": 0.9102169275283813, "logits/rejected": 0.8921658992767334, "logps/chosen": -1.494980812072754, "logps/rejected": -4.626764297485352, "loss": 0.4594, "nll_loss": 0.4397495985031128, "rewards/accuracies": 1.0, "rewards/chosen": -0.1494980901479721, "rewards/margins": 0.3131783604621887, "rewards/rejected": -0.46267643570899963, "step": 5138 }, { "epoch": 14.069815195071868, "grad_norm": 3.5399465560913086, "learning_rate": 2.9602739726027397e-07, "log_odds_chosen": 2.2831907272338867, "log_odds_ratio": -0.16327132284641266, "logits/chosen": 1.1467067003250122, "logits/rejected": 1.202297568321228, "logps/chosen": -2.006603956222534, "logps/rejected": -4.126889705657959, "loss": 0.636, "nll_loss": 0.6196246147155762, "rewards/accuracies": 1.0, "rewards/chosen": -0.20066040754318237, "rewards/margins": 0.21202857792377472, "rewards/rejected": -0.4126889705657959, "step": 5139 }, { "epoch": 14.072553045859001, "grad_norm": 4.972266674041748, "learning_rate": 2.958904109589041e-07, "log_odds_chosen": 1.5938973426818848, "log_odds_ratio": -0.318143367767334, "logits/chosen": 1.124147891998291, "logits/rejected": 1.1434834003448486, "logps/chosen": -2.8334543704986572, "logps/rejected": -4.294478416442871, "loss": 0.6311, "nll_loss": 0.5992587804794312, "rewards/accuracies": 1.0, "rewards/chosen": -0.28334543108940125, "rewards/margins": 0.1461023986339569, "rewards/rejected": -0.42944782972335815, "step": 5140 }, { "epoch": 14.075290896646132, "grad_norm": 3.615663528442383, "learning_rate": 2.957534246575342e-07, "log_odds_chosen": 4.413667678833008, "log_odds_ratio": -0.039291273802518845, "logits/chosen": 1.3402197360992432, "logits/rejected": 1.4229806661605835, "logps/chosen": -2.238837242126465, "logps/rejected": -6.500168323516846, "loss": 0.5331, "nll_loss": 0.5291851758956909, "rewards/accuracies": 1.0, "rewards/chosen": -0.2238837480545044, "rewards/margins": 0.42613306641578674, "rewards/rejected": -0.6500167846679688, "step": 5141 }, { "epoch": 14.078028747433265, "grad_norm": 3.32405424118042, "learning_rate": 2.956164383561643e-07, "log_odds_chosen": 3.730104684829712, "log_odds_ratio": -0.14173048734664917, "logits/chosen": 1.2849979400634766, "logits/rejected": 1.3029041290283203, "logps/chosen": -2.285092830657959, "logps/rejected": -5.878367900848389, "loss": 0.5575, "nll_loss": 0.5433129072189331, "rewards/accuracies": 1.0, "rewards/chosen": -0.2285093069076538, "rewards/margins": 0.3593275249004364, "rewards/rejected": -0.5878368616104126, "step": 5142 }, { "epoch": 14.080766598220396, "grad_norm": 4.034597396850586, "learning_rate": 2.9547945205479453e-07, "log_odds_chosen": 1.4906792640686035, "log_odds_ratio": -0.35485127568244934, "logits/chosen": 0.9978988170623779, "logits/rejected": 0.9553827047348022, "logps/chosen": -2.0043437480926514, "logps/rejected": -3.358482599258423, "loss": 0.623, "nll_loss": 0.5875172019004822, "rewards/accuracies": 0.875, "rewards/chosen": -0.2004343718290329, "rewards/margins": 0.13541388511657715, "rewards/rejected": -0.33584827184677124, "step": 5143 }, { "epoch": 14.08350444900753, "grad_norm": 2.98634934425354, "learning_rate": 2.9534246575342463e-07, "log_odds_chosen": 4.564364433288574, "log_odds_ratio": -0.15369565784931183, "logits/chosen": 1.0264723300933838, "logits/rejected": 1.0064764022827148, "logps/chosen": -1.6093268394470215, "logps/rejected": -5.963404655456543, "loss": 0.5388, "nll_loss": 0.5234630107879639, "rewards/accuracies": 1.0, "rewards/chosen": -0.16093268990516663, "rewards/margins": 0.4354077875614166, "rewards/rejected": -0.5963404774665833, "step": 5144 }, { "epoch": 14.08624229979466, "grad_norm": 4.594339847564697, "learning_rate": 2.952054794520548e-07, "log_odds_chosen": 2.614792823791504, "log_odds_ratio": -0.19299228489398956, "logits/chosen": 0.9890747666358948, "logits/rejected": 1.0169141292572021, "logps/chosen": -2.4198966026306152, "logps/rejected": -4.9175872802734375, "loss": 0.4722, "nll_loss": 0.4528995454311371, "rewards/accuracies": 1.0, "rewards/chosen": -0.24198967218399048, "rewards/margins": 0.24976904690265656, "rewards/rejected": -0.4917587339878082, "step": 5145 }, { "epoch": 14.088980150581794, "grad_norm": 3.2288818359375, "learning_rate": 2.9506849315068493e-07, "log_odds_chosen": 2.4994118213653564, "log_odds_ratio": -0.1916029304265976, "logits/chosen": 1.1821671724319458, "logits/rejected": 1.1893789768218994, "logps/chosen": -1.7792093753814697, "logps/rejected": -4.118402481079102, "loss": 0.4878, "nll_loss": 0.46862152218818665, "rewards/accuracies": 1.0, "rewards/chosen": -0.17792095243930817, "rewards/margins": 0.23391930758953094, "rewards/rejected": -0.4118402600288391, "step": 5146 }, { "epoch": 14.091718001368925, "grad_norm": 4.419345855712891, "learning_rate": 2.949315068493151e-07, "log_odds_chosen": 1.9407744407653809, "log_odds_ratio": -0.22171911597251892, "logits/chosen": 0.977928876876831, "logits/rejected": 0.9931904673576355, "logps/chosen": -2.3553988933563232, "logps/rejected": -4.149476051330566, "loss": 0.5296, "nll_loss": 0.5074295997619629, "rewards/accuracies": 1.0, "rewards/chosen": -0.23553988337516785, "rewards/margins": 0.17940771579742432, "rewards/rejected": -0.41494759917259216, "step": 5147 }, { "epoch": 14.094455852156058, "grad_norm": 9.37912654876709, "learning_rate": 2.947945205479452e-07, "log_odds_chosen": 2.564939498901367, "log_odds_ratio": -0.6412293314933777, "logits/chosen": 1.3020449876785278, "logits/rejected": 1.1967836618423462, "logps/chosen": -2.161327362060547, "logps/rejected": -4.631916046142578, "loss": 0.6197, "nll_loss": 0.5555588006973267, "rewards/accuracies": 0.875, "rewards/chosen": -0.2161327600479126, "rewards/margins": 0.24705886840820312, "rewards/rejected": -0.4631916284561157, "step": 5148 }, { "epoch": 14.097193702943189, "grad_norm": 3.9273219108581543, "learning_rate": 2.9465753424657534e-07, "log_odds_chosen": 1.9266694784164429, "log_odds_ratio": -0.28945860266685486, "logits/chosen": 1.353613018989563, "logits/rejected": 1.2938578128814697, "logps/chosen": -1.5840990543365479, "logps/rejected": -3.33986234664917, "loss": 0.4638, "nll_loss": 0.4348246157169342, "rewards/accuracies": 1.0, "rewards/chosen": -0.15840989351272583, "rewards/margins": 0.1755763590335846, "rewards/rejected": -0.3339862525463104, "step": 5149 }, { "epoch": 14.099931553730322, "grad_norm": 9.767200469970703, "learning_rate": 2.945205479452055e-07, "log_odds_chosen": 2.734494209289551, "log_odds_ratio": -0.3895163834095001, "logits/chosen": 1.059131145477295, "logits/rejected": 1.0110024213790894, "logps/chosen": -2.371840000152588, "logps/rejected": -5.0031914710998535, "loss": 0.5589, "nll_loss": 0.5199079513549805, "rewards/accuracies": 0.75, "rewards/chosen": -0.23718398809432983, "rewards/margins": 0.2631351351737976, "rewards/rejected": -0.5003191828727722, "step": 5150 }, { "epoch": 14.102669404517453, "grad_norm": 3.762523651123047, "learning_rate": 2.943835616438356e-07, "log_odds_chosen": 1.3235567808151245, "log_odds_ratio": -0.34731972217559814, "logits/chosen": 1.0416194200515747, "logits/rejected": 0.9927438497543335, "logps/chosen": -1.6061923503875732, "logps/rejected": -2.8106188774108887, "loss": 0.4368, "nll_loss": 0.40211376547813416, "rewards/accuracies": 0.875, "rewards/chosen": -0.16061924397945404, "rewards/margins": 0.12044263631105423, "rewards/rejected": -0.28106188774108887, "step": 5151 }, { "epoch": 14.105407255304586, "grad_norm": 10.64776611328125, "learning_rate": 2.9424657534246574e-07, "log_odds_chosen": 1.8236221075057983, "log_odds_ratio": -0.5211765170097351, "logits/chosen": 1.201364278793335, "logits/rejected": 1.1916736364364624, "logps/chosen": -2.1216328144073486, "logps/rejected": -3.758936882019043, "loss": 0.5978, "nll_loss": 0.5457216501235962, "rewards/accuracies": 0.875, "rewards/chosen": -0.2121632695198059, "rewards/margins": 0.16373038291931152, "rewards/rejected": -0.3758936822414398, "step": 5152 }, { "epoch": 14.108145106091717, "grad_norm": 3.349097967147827, "learning_rate": 2.941095890410959e-07, "log_odds_chosen": 4.500685691833496, "log_odds_ratio": -0.06394737958908081, "logits/chosen": 1.2575539350509644, "logits/rejected": 1.2987463474273682, "logps/chosen": -1.5529659986495972, "logps/rejected": -5.779757499694824, "loss": 0.5896, "nll_loss": 0.5831896066665649, "rewards/accuracies": 1.0, "rewards/chosen": -0.15529660880565643, "rewards/margins": 0.42267918586730957, "rewards/rejected": -0.5779757499694824, "step": 5153 }, { "epoch": 14.11088295687885, "grad_norm": 3.8215107917785645, "learning_rate": 2.9397260273972604e-07, "log_odds_chosen": 1.5634937286376953, "log_odds_ratio": -0.3081028163433075, "logits/chosen": 1.138711929321289, "logits/rejected": 1.058961033821106, "logps/chosen": -1.2853182554244995, "logps/rejected": -2.617274284362793, "loss": 0.4507, "nll_loss": 0.41986003518104553, "rewards/accuracies": 1.0, "rewards/chosen": -0.1285318285226822, "rewards/margins": 0.13319562375545502, "rewards/rejected": -0.2617274522781372, "step": 5154 }, { "epoch": 14.113620807665983, "grad_norm": 3.1679229736328125, "learning_rate": 2.9383561643835614e-07, "log_odds_chosen": 2.7309792041778564, "log_odds_ratio": -0.17260408401489258, "logits/chosen": 1.4319415092468262, "logits/rejected": 1.4269622564315796, "logps/chosen": -1.6781048774719238, "logps/rejected": -4.195370197296143, "loss": 0.5468, "nll_loss": 0.5295543670654297, "rewards/accuracies": 1.0, "rewards/chosen": -0.16781049966812134, "rewards/margins": 0.25172653794288635, "rewards/rejected": -0.4195370078086853, "step": 5155 }, { "epoch": 14.116358658453114, "grad_norm": 3.6825921535491943, "learning_rate": 2.936986301369863e-07, "log_odds_chosen": 1.4320803880691528, "log_odds_ratio": -0.3158590793609619, "logits/chosen": 1.1259117126464844, "logits/rejected": 1.111273169517517, "logps/chosen": -1.4967048168182373, "logps/rejected": -2.740596055984497, "loss": 0.5006, "nll_loss": 0.46902376413345337, "rewards/accuracies": 0.875, "rewards/chosen": -0.14967048168182373, "rewards/margins": 0.12438911199569702, "rewards/rejected": -0.27405959367752075, "step": 5156 }, { "epoch": 14.119096509240247, "grad_norm": 3.70268177986145, "learning_rate": 2.9356164383561645e-07, "log_odds_chosen": 2.217168092727661, "log_odds_ratio": -0.23359903693199158, "logits/chosen": 1.212308406829834, "logits/rejected": 1.2287843227386475, "logps/chosen": -1.8894984722137451, "logps/rejected": -3.9598453044891357, "loss": 0.5232, "nll_loss": 0.4998253285884857, "rewards/accuracies": 1.0, "rewards/chosen": -0.1889498382806778, "rewards/margins": 0.20703467726707458, "rewards/rejected": -0.3959845304489136, "step": 5157 }, { "epoch": 14.121834360027378, "grad_norm": 4.78438138961792, "learning_rate": 2.9342465753424654e-07, "log_odds_chosen": 2.1419997215270996, "log_odds_ratio": -0.18946313858032227, "logits/chosen": 0.8238360285758972, "logits/rejected": 0.7616978883743286, "logps/chosen": -1.7852239608764648, "logps/rejected": -3.740931510925293, "loss": 0.5423, "nll_loss": 0.523321807384491, "rewards/accuracies": 1.0, "rewards/chosen": -0.17852239310741425, "rewards/margins": 0.19557076692581177, "rewards/rejected": -0.3740931749343872, "step": 5158 }, { "epoch": 14.124572210814511, "grad_norm": 3.844594955444336, "learning_rate": 2.932876712328767e-07, "log_odds_chosen": 3.603700876235962, "log_odds_ratio": -0.25481361150741577, "logits/chosen": 1.0791869163513184, "logits/rejected": 1.0086865425109863, "logps/chosen": -2.037393808364868, "logps/rejected": -5.459938049316406, "loss": 0.634, "nll_loss": 0.6084937453269958, "rewards/accuracies": 0.875, "rewards/chosen": -0.20373938977718353, "rewards/margins": 0.3422544300556183, "rewards/rejected": -0.5459938049316406, "step": 5159 }, { "epoch": 14.127310061601642, "grad_norm": 5.330091953277588, "learning_rate": 2.9315068493150685e-07, "log_odds_chosen": 2.6478898525238037, "log_odds_ratio": -0.25065910816192627, "logits/chosen": 1.0547239780426025, "logits/rejected": 1.0406718254089355, "logps/chosen": -2.629951000213623, "logps/rejected": -5.135867118835449, "loss": 0.5849, "nll_loss": 0.559882402420044, "rewards/accuracies": 0.875, "rewards/chosen": -0.2629950940608978, "rewards/margins": 0.25059157609939575, "rewards/rejected": -0.513586699962616, "step": 5160 }, { "epoch": 14.130047912388775, "grad_norm": 4.219234466552734, "learning_rate": 2.9301369863013695e-07, "log_odds_chosen": 3.3313961029052734, "log_odds_ratio": -0.5025175213813782, "logits/chosen": 0.9205099940299988, "logits/rejected": 0.9048534631729126, "logps/chosen": -2.301845073699951, "logps/rejected": -5.447600364685059, "loss": 0.6161, "nll_loss": 0.565811038017273, "rewards/accuracies": 0.75, "rewards/chosen": -0.23018452525138855, "rewards/margins": 0.31457552313804626, "rewards/rejected": -0.5447601079940796, "step": 5161 }, { "epoch": 14.132785763175907, "grad_norm": 3.6169331073760986, "learning_rate": 2.928767123287671e-07, "log_odds_chosen": 1.5753726959228516, "log_odds_ratio": -0.30229803919792175, "logits/chosen": 1.4059498310089111, "logits/rejected": 1.4029922485351562, "logps/chosen": -1.9433648586273193, "logps/rejected": -3.3646068572998047, "loss": 0.4089, "nll_loss": 0.37871044874191284, "rewards/accuracies": 1.0, "rewards/chosen": -0.19433648884296417, "rewards/margins": 0.142124205827713, "rewards/rejected": -0.336460679769516, "step": 5162 }, { "epoch": 14.13552361396304, "grad_norm": 4.224146366119385, "learning_rate": 2.9273972602739725e-07, "log_odds_chosen": 2.6768269538879395, "log_odds_ratio": -0.15858031809329987, "logits/chosen": 1.081139087677002, "logits/rejected": 0.9500388503074646, "logps/chosen": -1.3421579599380493, "logps/rejected": -3.741647243499756, "loss": 0.5286, "nll_loss": 0.5127053260803223, "rewards/accuracies": 1.0, "rewards/chosen": -0.1342158019542694, "rewards/margins": 0.23994889855384827, "rewards/rejected": -0.3741647005081177, "step": 5163 }, { "epoch": 14.13826146475017, "grad_norm": 4.104914665222168, "learning_rate": 2.926027397260274e-07, "log_odds_chosen": 1.8394943475723267, "log_odds_ratio": -0.4203619360923767, "logits/chosen": 0.924920916557312, "logits/rejected": 0.8766182661056519, "logps/chosen": -2.109102249145508, "logps/rejected": -3.8323161602020264, "loss": 0.5087, "nll_loss": 0.466632217168808, "rewards/accuracies": 0.625, "rewards/chosen": -0.21091023087501526, "rewards/margins": 0.1723214089870453, "rewards/rejected": -0.38323163986206055, "step": 5164 }, { "epoch": 14.140999315537304, "grad_norm": 3.53230357170105, "learning_rate": 2.924657534246575e-07, "log_odds_chosen": 2.7294814586639404, "log_odds_ratio": -0.17870983481407166, "logits/chosen": 0.9407546520233154, "logits/rejected": 0.9642673134803772, "logps/chosen": -1.7126598358154297, "logps/rejected": -4.143791198730469, "loss": 0.5355, "nll_loss": 0.5176584124565125, "rewards/accuracies": 1.0, "rewards/chosen": -0.17126598954200745, "rewards/margins": 0.2431131899356842, "rewards/rejected": -0.41437914967536926, "step": 5165 }, { "epoch": 14.143737166324435, "grad_norm": 4.4922637939453125, "learning_rate": 2.9232876712328766e-07, "log_odds_chosen": 1.3984272480010986, "log_odds_ratio": -0.48969733715057373, "logits/chosen": 1.058120608329773, "logits/rejected": 1.053041696548462, "logps/chosen": -2.1522328853607178, "logps/rejected": -3.4772281646728516, "loss": 0.6312, "nll_loss": 0.5821824073791504, "rewards/accuracies": 0.75, "rewards/chosen": -0.2152232825756073, "rewards/margins": 0.13249953091144562, "rewards/rejected": -0.3477228283882141, "step": 5166 }, { "epoch": 14.146475017111568, "grad_norm": 3.6544077396392822, "learning_rate": 2.921917808219178e-07, "log_odds_chosen": 2.486039161682129, "log_odds_ratio": -0.18678916990756989, "logits/chosen": 0.9818723797798157, "logits/rejected": 0.955439567565918, "logps/chosen": -1.9976978302001953, "logps/rejected": -4.31058406829834, "loss": 0.5125, "nll_loss": 0.4938080310821533, "rewards/accuracies": 1.0, "rewards/chosen": -0.1997697949409485, "rewards/margins": 0.2312886118888855, "rewards/rejected": -0.431058406829834, "step": 5167 }, { "epoch": 14.149212867898699, "grad_norm": 3.461775779724121, "learning_rate": 2.920547945205479e-07, "log_odds_chosen": 3.4613547325134277, "log_odds_ratio": -0.20213885605335236, "logits/chosen": 1.0553141832351685, "logits/rejected": 1.0350476503372192, "logps/chosen": -2.10351300239563, "logps/rejected": -5.423172950744629, "loss": 0.6241, "nll_loss": 0.6039021015167236, "rewards/accuracies": 0.875, "rewards/chosen": -0.21035130321979523, "rewards/margins": 0.33196601271629333, "rewards/rejected": -0.542317271232605, "step": 5168 }, { "epoch": 14.151950718685832, "grad_norm": 5.068709373474121, "learning_rate": 2.9191780821917806e-07, "log_odds_chosen": 3.05875825881958, "log_odds_ratio": -0.31184133887290955, "logits/chosen": 1.2353160381317139, "logits/rejected": 1.2951467037200928, "logps/chosen": -2.0301995277404785, "logps/rejected": -4.858465194702148, "loss": 0.5476, "nll_loss": 0.5164142847061157, "rewards/accuracies": 0.875, "rewards/chosen": -0.20301997661590576, "rewards/margins": 0.28282660245895386, "rewards/rejected": -0.4858465790748596, "step": 5169 }, { "epoch": 14.154688569472963, "grad_norm": 5.231984615325928, "learning_rate": 2.917808219178082e-07, "log_odds_chosen": 0.8663097620010376, "log_odds_ratio": -0.5878289341926575, "logits/chosen": 0.9408458471298218, "logits/rejected": 0.8997994065284729, "logps/chosen": -1.9731117486953735, "logps/rejected": -2.683764934539795, "loss": 0.5415, "nll_loss": 0.48274844884872437, "rewards/accuracies": 0.875, "rewards/chosen": -0.1973111778497696, "rewards/margins": 0.07106530666351318, "rewards/rejected": -0.26837649941444397, "step": 5170 }, { "epoch": 14.157426420260096, "grad_norm": 3.9381794929504395, "learning_rate": 2.9164383561643836e-07, "log_odds_chosen": 1.3981139659881592, "log_odds_ratio": -0.39022374153137207, "logits/chosen": 1.0005074739456177, "logits/rejected": 0.984411358833313, "logps/chosen": -1.2900316715240479, "logps/rejected": -2.5340681076049805, "loss": 0.478, "nll_loss": 0.4389960765838623, "rewards/accuracies": 0.875, "rewards/chosen": -0.12900318205356598, "rewards/margins": 0.12440366297960281, "rewards/rejected": -0.2534068524837494, "step": 5171 }, { "epoch": 14.160164271047227, "grad_norm": 4.108050346374512, "learning_rate": 2.9150684931506846e-07, "log_odds_chosen": 2.985769033432007, "log_odds_ratio": -0.25165215134620667, "logits/chosen": 0.9720874428749084, "logits/rejected": 0.8472374081611633, "logps/chosen": -1.424445629119873, "logps/rejected": -4.208641052246094, "loss": 0.4953, "nll_loss": 0.47012048959732056, "rewards/accuracies": 0.875, "rewards/chosen": -0.14244458079338074, "rewards/margins": 0.278419554233551, "rewards/rejected": -0.42086413502693176, "step": 5172 }, { "epoch": 14.16290212183436, "grad_norm": 5.718862533569336, "learning_rate": 2.913698630136986e-07, "log_odds_chosen": 1.765581488609314, "log_odds_ratio": -0.3946736454963684, "logits/chosen": 0.9572451114654541, "logits/rejected": 0.9667937755584717, "logps/chosen": -1.7663068771362305, "logps/rejected": -3.266896963119507, "loss": 0.5056, "nll_loss": 0.46613988280296326, "rewards/accuracies": 0.875, "rewards/chosen": -0.17663069069385529, "rewards/margins": 0.15005899965763092, "rewards/rejected": -0.3266896903514862, "step": 5173 }, { "epoch": 14.165639972621491, "grad_norm": 4.070941925048828, "learning_rate": 2.9123287671232877e-07, "log_odds_chosen": 4.193853855133057, "log_odds_ratio": -0.2543714940547943, "logits/chosen": 1.0217536687850952, "logits/rejected": 1.076048731803894, "logps/chosen": -2.155466079711914, "logps/rejected": -6.138494968414307, "loss": 0.5688, "nll_loss": 0.5433377027511597, "rewards/accuracies": 0.875, "rewards/chosen": -0.2155466228723526, "rewards/margins": 0.3983028829097748, "rewards/rejected": -0.6138495206832886, "step": 5174 }, { "epoch": 14.168377823408624, "grad_norm": 4.079052448272705, "learning_rate": 2.9109589041095887e-07, "log_odds_chosen": 3.100205421447754, "log_odds_ratio": -0.17774584889411926, "logits/chosen": 1.1281977891921997, "logits/rejected": 1.0915765762329102, "logps/chosen": -2.215087890625, "logps/rejected": -5.182910442352295, "loss": 0.5883, "nll_loss": 0.5704814195632935, "rewards/accuracies": 1.0, "rewards/chosen": -0.22150878608226776, "rewards/margins": 0.2967822551727295, "rewards/rejected": -0.5182910561561584, "step": 5175 }, { "epoch": 14.171115674195756, "grad_norm": 3.7899529933929443, "learning_rate": 2.90958904109589e-07, "log_odds_chosen": 2.6273083686828613, "log_odds_ratio": -0.17973670363426208, "logits/chosen": 0.8834394216537476, "logits/rejected": 0.8738667964935303, "logps/chosen": -1.9433825016021729, "logps/rejected": -4.402463436126709, "loss": 0.5401, "nll_loss": 0.5221328735351562, "rewards/accuracies": 1.0, "rewards/chosen": -0.19433824717998505, "rewards/margins": 0.24590809643268585, "rewards/rejected": -0.4402463734149933, "step": 5176 }, { "epoch": 14.173853524982889, "grad_norm": 8.123703002929688, "learning_rate": 2.9082191780821917e-07, "log_odds_chosen": 1.6603418588638306, "log_odds_ratio": -0.4991043210029602, "logits/chosen": 1.259135365486145, "logits/rejected": 1.279112696647644, "logps/chosen": -3.026087999343872, "logps/rejected": -4.451654434204102, "loss": 0.6306, "nll_loss": 0.5806426405906677, "rewards/accuracies": 0.875, "rewards/chosen": -0.30260881781578064, "rewards/margins": 0.14255668222904205, "rewards/rejected": -0.4451655149459839, "step": 5177 }, { "epoch": 14.17659137577002, "grad_norm": 4.358767032623291, "learning_rate": 2.906849315068493e-07, "log_odds_chosen": 2.9963533878326416, "log_odds_ratio": -0.20646879076957703, "logits/chosen": 0.999332845211029, "logits/rejected": 0.9921543598175049, "logps/chosen": -2.7502012252807617, "logps/rejected": -5.614026069641113, "loss": 0.6452, "nll_loss": 0.6245278120040894, "rewards/accuracies": 1.0, "rewards/chosen": -0.27502012252807617, "rewards/margins": 0.2863825559616089, "rewards/rejected": -0.5614026188850403, "step": 5178 }, { "epoch": 14.179329226557153, "grad_norm": 3.691272020339966, "learning_rate": 2.905479452054794e-07, "log_odds_chosen": 2.29286789894104, "log_odds_ratio": -0.2038823664188385, "logits/chosen": 1.091928243637085, "logits/rejected": 1.1141351461410522, "logps/chosen": -2.3464300632476807, "logps/rejected": -4.501984596252441, "loss": 0.5951, "nll_loss": 0.5746909379959106, "rewards/accuracies": 1.0, "rewards/chosen": -0.23464301228523254, "rewards/margins": 0.21555545926094055, "rewards/rejected": -0.4501984715461731, "step": 5179 }, { "epoch": 14.182067077344286, "grad_norm": 9.547626495361328, "learning_rate": 2.9041095890410957e-07, "log_odds_chosen": 2.87288761138916, "log_odds_ratio": -0.49566006660461426, "logits/chosen": 1.3210808038711548, "logits/rejected": 1.2865883111953735, "logps/chosen": -2.029618501663208, "logps/rejected": -4.697596549987793, "loss": 0.6186, "nll_loss": 0.5690534710884094, "rewards/accuracies": 0.875, "rewards/chosen": -0.20296184718608856, "rewards/margins": 0.266797810792923, "rewards/rejected": -0.46975967288017273, "step": 5180 }, { "epoch": 14.184804928131417, "grad_norm": 3.3522932529449463, "learning_rate": 2.902739726027397e-07, "log_odds_chosen": 2.540721893310547, "log_odds_ratio": -0.12869368493556976, "logits/chosen": 0.987205982208252, "logits/rejected": 0.9883474707603455, "logps/chosen": -1.5785002708435059, "logps/rejected": -3.895615577697754, "loss": 0.536, "nll_loss": 0.5231583714485168, "rewards/accuracies": 1.0, "rewards/chosen": -0.15785004198551178, "rewards/margins": 0.23171153664588928, "rewards/rejected": -0.38956159353256226, "step": 5181 }, { "epoch": 14.18754277891855, "grad_norm": 3.8632893562316895, "learning_rate": 2.901369863013698e-07, "log_odds_chosen": 1.8985342979431152, "log_odds_ratio": -0.16175004839897156, "logits/chosen": 1.213910698890686, "logits/rejected": 1.1690446138381958, "logps/chosen": -2.0840208530426025, "logps/rejected": -3.7593812942504883, "loss": 0.5358, "nll_loss": 0.5196743011474609, "rewards/accuracies": 1.0, "rewards/chosen": -0.2084020972251892, "rewards/margins": 0.16753605008125305, "rewards/rejected": -0.37593814730644226, "step": 5182 }, { "epoch": 14.190280629705681, "grad_norm": 3.17655086517334, "learning_rate": 2.9e-07, "log_odds_chosen": 2.21073842048645, "log_odds_ratio": -0.20784996449947357, "logits/chosen": 1.2289974689483643, "logits/rejected": 1.2394096851348877, "logps/chosen": -1.4596643447875977, "logps/rejected": -3.4175007343292236, "loss": 0.4377, "nll_loss": 0.41690966486930847, "rewards/accuracies": 1.0, "rewards/chosen": -0.14596644043922424, "rewards/margins": 0.19578364491462708, "rewards/rejected": -0.3417500853538513, "step": 5183 }, { "epoch": 14.193018480492814, "grad_norm": 4.0669474601745605, "learning_rate": 2.8986301369863013e-07, "log_odds_chosen": 2.0322861671447754, "log_odds_ratio": -0.2541331648826599, "logits/chosen": 1.0857267379760742, "logits/rejected": 1.1323986053466797, "logps/chosen": -2.2861335277557373, "logps/rejected": -4.191112041473389, "loss": 0.5434, "nll_loss": 0.5179945826530457, "rewards/accuracies": 0.875, "rewards/chosen": -0.22861337661743164, "rewards/margins": 0.19049783051013947, "rewards/rejected": -0.4191111922264099, "step": 5184 }, { "epoch": 14.195756331279945, "grad_norm": 3.8977725505828857, "learning_rate": 2.897260273972603e-07, "log_odds_chosen": 4.121551513671875, "log_odds_ratio": -0.17172974348068237, "logits/chosen": 1.4063903093338013, "logits/rejected": 1.4093327522277832, "logps/chosen": -2.257784843444824, "logps/rejected": -6.213494777679443, "loss": 0.5944, "nll_loss": 0.5771872997283936, "rewards/accuracies": 1.0, "rewards/chosen": -0.2257785052061081, "rewards/margins": 0.3955709934234619, "rewards/rejected": -0.6213494539260864, "step": 5185 }, { "epoch": 14.198494182067078, "grad_norm": 4.238974571228027, "learning_rate": 2.895890410958904e-07, "log_odds_chosen": 1.9458004236221313, "log_odds_ratio": -0.5190317630767822, "logits/chosen": 0.871837854385376, "logits/rejected": 0.8895452618598938, "logps/chosen": -2.7784268856048584, "logps/rejected": -4.627493858337402, "loss": 0.7578, "nll_loss": 0.7059371471405029, "rewards/accuracies": 0.75, "rewards/chosen": -0.2778427004814148, "rewards/margins": 0.1849067062139511, "rewards/rejected": -0.4627494215965271, "step": 5186 }, { "epoch": 14.20123203285421, "grad_norm": 3.23803973197937, "learning_rate": 2.894520547945206e-07, "log_odds_chosen": 3.726264715194702, "log_odds_ratio": -0.11156336963176727, "logits/chosen": 1.0741443634033203, "logits/rejected": 1.0676939487457275, "logps/chosen": -1.8399701118469238, "logps/rejected": -5.333799839019775, "loss": 0.4855, "nll_loss": 0.4743711054325104, "rewards/accuracies": 1.0, "rewards/chosen": -0.1839970052242279, "rewards/margins": 0.34938302636146545, "rewards/rejected": -0.5333800315856934, "step": 5187 }, { "epoch": 14.203969883641342, "grad_norm": 3.3360772132873535, "learning_rate": 2.893150684931507e-07, "log_odds_chosen": 3.241520881652832, "log_odds_ratio": -0.1445472091436386, "logits/chosen": 1.4035593271255493, "logits/rejected": 1.3867833614349365, "logps/chosen": -1.8193283081054688, "logps/rejected": -4.892819404602051, "loss": 0.4867, "nll_loss": 0.47226569056510925, "rewards/accuracies": 0.875, "rewards/chosen": -0.18193283677101135, "rewards/margins": 0.3073491156101227, "rewards/rejected": -0.48928195238113403, "step": 5188 }, { "epoch": 14.206707734428473, "grad_norm": 10.379125595092773, "learning_rate": 2.891780821917808e-07, "log_odds_chosen": 0.9483718276023865, "log_odds_ratio": -0.5265225172042847, "logits/chosen": 1.0788596868515015, "logits/rejected": 0.929017961025238, "logps/chosen": -2.0485291481018066, "logps/rejected": -2.7863144874572754, "loss": 0.7236, "nll_loss": 0.6709558367729187, "rewards/accuracies": 0.75, "rewards/chosen": -0.204852893948555, "rewards/margins": 0.07377855479717255, "rewards/rejected": -0.27863144874572754, "step": 5189 }, { "epoch": 14.209445585215606, "grad_norm": 3.9536163806915283, "learning_rate": 2.8904109589041093e-07, "log_odds_chosen": 4.1110124588012695, "log_odds_ratio": -0.11520817875862122, "logits/chosen": 1.0863056182861328, "logits/rejected": 1.1610777378082275, "logps/chosen": -2.245983362197876, "logps/rejected": -6.193999290466309, "loss": 0.6414, "nll_loss": 0.6298300623893738, "rewards/accuracies": 1.0, "rewards/chosen": -0.22459834814071655, "rewards/margins": 0.3948015868663788, "rewards/rejected": -0.619399905204773, "step": 5190 }, { "epoch": 14.212183436002737, "grad_norm": 4.331177234649658, "learning_rate": 2.889041095890411e-07, "log_odds_chosen": 3.0719380378723145, "log_odds_ratio": -0.12030497193336487, "logits/chosen": 1.1283701658248901, "logits/rejected": 1.19804048538208, "logps/chosen": -1.997277855873108, "logps/rejected": -4.875664710998535, "loss": 0.5964, "nll_loss": 0.5844103097915649, "rewards/accuracies": 1.0, "rewards/chosen": -0.19972778856754303, "rewards/margins": 0.28783872723579407, "rewards/rejected": -0.4875665009021759, "step": 5191 }, { "epoch": 14.21492128678987, "grad_norm": 3.762754201889038, "learning_rate": 2.8876712328767124e-07, "log_odds_chosen": 2.533019781112671, "log_odds_ratio": -0.22931867837905884, "logits/chosen": 0.8760610818862915, "logits/rejected": 0.8293035626411438, "logps/chosen": -1.4183300733566284, "logps/rejected": -3.7175791263580322, "loss": 0.4864, "nll_loss": 0.46346646547317505, "rewards/accuracies": 1.0, "rewards/chosen": -0.14183300733566284, "rewards/margins": 0.22992490231990814, "rewards/rejected": -0.3717579245567322, "step": 5192 }, { "epoch": 14.217659137577002, "grad_norm": 4.183099746704102, "learning_rate": 2.8863013698630134e-07, "log_odds_chosen": 1.9407259225845337, "log_odds_ratio": -0.19454655051231384, "logits/chosen": 0.9420396089553833, "logits/rejected": 0.969423770904541, "logps/chosen": -1.9130640029907227, "logps/rejected": -3.669152021408081, "loss": 0.5837, "nll_loss": 0.5642440319061279, "rewards/accuracies": 1.0, "rewards/chosen": -0.19130641222000122, "rewards/margins": 0.1756087839603424, "rewards/rejected": -0.36691519618034363, "step": 5193 }, { "epoch": 14.220396988364135, "grad_norm": 3.882283926010132, "learning_rate": 2.8849315068493154e-07, "log_odds_chosen": 2.3226919174194336, "log_odds_ratio": -0.24697056412696838, "logits/chosen": 1.104509949684143, "logits/rejected": 1.1748738288879395, "logps/chosen": -2.071483612060547, "logps/rejected": -4.241832733154297, "loss": 0.6486, "nll_loss": 0.6238535046577454, "rewards/accuracies": 1.0, "rewards/chosen": -0.20714838802814484, "rewards/margins": 0.2170349359512329, "rewards/rejected": -0.42418330907821655, "step": 5194 }, { "epoch": 14.223134839151266, "grad_norm": 4.085179328918457, "learning_rate": 2.8835616438356164e-07, "log_odds_chosen": 2.1891722679138184, "log_odds_ratio": -0.20298892259597778, "logits/chosen": 1.0733574628829956, "logits/rejected": 1.02701735496521, "logps/chosen": -2.2926535606384277, "logps/rejected": -4.319031715393066, "loss": 0.5336, "nll_loss": 0.5132634043693542, "rewards/accuracies": 0.875, "rewards/chosen": -0.22926536202430725, "rewards/margins": 0.20263780653476715, "rewards/rejected": -0.4319031834602356, "step": 5195 }, { "epoch": 14.225872689938399, "grad_norm": 4.72736930847168, "learning_rate": 2.8821917808219174e-07, "log_odds_chosen": 5.440296649932861, "log_odds_ratio": -0.042099375277757645, "logits/chosen": 1.1433268785476685, "logits/rejected": 1.1705604791641235, "logps/chosen": -2.382720708847046, "logps/rejected": -7.699987411499023, "loss": 0.7991, "nll_loss": 0.7949178218841553, "rewards/accuracies": 1.0, "rewards/chosen": -0.2382720559835434, "rewards/margins": 0.5317267179489136, "rewards/rejected": -0.7699986696243286, "step": 5196 }, { "epoch": 14.22861054072553, "grad_norm": 6.51998233795166, "learning_rate": 2.880821917808219e-07, "log_odds_chosen": 1.398837924003601, "log_odds_ratio": -0.484528124332428, "logits/chosen": 0.6471518278121948, "logits/rejected": 0.6610247492790222, "logps/chosen": -2.3159847259521484, "logps/rejected": -3.636794090270996, "loss": 0.6698, "nll_loss": 0.6213325262069702, "rewards/accuracies": 0.75, "rewards/chosen": -0.23159849643707275, "rewards/margins": 0.13208091259002686, "rewards/rejected": -0.3636794090270996, "step": 5197 }, { "epoch": 14.231348391512663, "grad_norm": 3.7342634201049805, "learning_rate": 2.8794520547945205e-07, "log_odds_chosen": 3.4559412002563477, "log_odds_ratio": -0.08568011224269867, "logits/chosen": 1.057960033416748, "logits/rejected": 1.0482853651046753, "logps/chosen": -1.7423946857452393, "logps/rejected": -4.912039279937744, "loss": 0.4662, "nll_loss": 0.45759233832359314, "rewards/accuracies": 1.0, "rewards/chosen": -0.17423945665359497, "rewards/margins": 0.3169644773006439, "rewards/rejected": -0.4912039339542389, "step": 5198 }, { "epoch": 14.234086242299794, "grad_norm": 3.506054639816284, "learning_rate": 2.878082191780822e-07, "log_odds_chosen": 1.8256953954696655, "log_odds_ratio": -0.2905338704586029, "logits/chosen": 1.0497032403945923, "logits/rejected": 1.0594556331634521, "logps/chosen": -1.4042507410049438, "logps/rejected": -3.0381083488464355, "loss": 0.5496, "nll_loss": 0.5205431580543518, "rewards/accuracies": 0.875, "rewards/chosen": -0.14042508602142334, "rewards/margins": 0.16338573396205902, "rewards/rejected": -0.30381083488464355, "step": 5199 }, { "epoch": 14.236824093086927, "grad_norm": 3.515902042388916, "learning_rate": 2.876712328767123e-07, "log_odds_chosen": 4.11987829208374, "log_odds_ratio": -0.0748477503657341, "logits/chosen": 1.1998674869537354, "logits/rejected": 1.2745298147201538, "logps/chosen": -2.3296127319335938, "logps/rejected": -6.265599250793457, "loss": 0.7268, "nll_loss": 0.7193308472633362, "rewards/accuracies": 1.0, "rewards/chosen": -0.2329612821340561, "rewards/margins": 0.39359867572784424, "rewards/rejected": -0.6265599727630615, "step": 5200 }, { "epoch": 14.239561943874058, "grad_norm": 4.61491060256958, "learning_rate": 2.875342465753425e-07, "log_odds_chosen": 2.07216215133667, "log_odds_ratio": -0.17237430810928345, "logits/chosen": 1.0582565069198608, "logits/rejected": 1.0480293035507202, "logps/chosen": -2.552724838256836, "logps/rejected": -4.462372779846191, "loss": 0.5674, "nll_loss": 0.5501956939697266, "rewards/accuracies": 1.0, "rewards/chosen": -0.2552725076675415, "rewards/margins": 0.19096480309963226, "rewards/rejected": -0.44623732566833496, "step": 5201 }, { "epoch": 14.242299794661191, "grad_norm": 3.391554355621338, "learning_rate": 2.873972602739726e-07, "log_odds_chosen": 1.9242570400238037, "log_odds_ratio": -0.2062017172574997, "logits/chosen": 1.1143614053726196, "logits/rejected": 1.006048560142517, "logps/chosen": -1.3980343341827393, "logps/rejected": -3.0837748050689697, "loss": 0.4332, "nll_loss": 0.4125635623931885, "rewards/accuracies": 1.0, "rewards/chosen": -0.1398034393787384, "rewards/margins": 0.16857406497001648, "rewards/rejected": -0.3083774745464325, "step": 5202 }, { "epoch": 14.245037645448322, "grad_norm": 3.8531699180603027, "learning_rate": 2.872602739726027e-07, "log_odds_chosen": 3.6240756511688232, "log_odds_ratio": -0.14931446313858032, "logits/chosen": 1.1282050609588623, "logits/rejected": 1.0988589525222778, "logps/chosen": -2.168922185897827, "logps/rejected": -5.5844011306762695, "loss": 0.5808, "nll_loss": 0.5658295154571533, "rewards/accuracies": 1.0, "rewards/chosen": -0.21689224243164062, "rewards/margins": 0.3415479063987732, "rewards/rejected": -0.5584401488304138, "step": 5203 }, { "epoch": 14.247775496235455, "grad_norm": 4.507339000701904, "learning_rate": 2.8712328767123285e-07, "log_odds_chosen": 2.540313243865967, "log_odds_ratio": -0.3728742301464081, "logits/chosen": 0.8890022039413452, "logits/rejected": 0.8535844087600708, "logps/chosen": -2.3862032890319824, "logps/rejected": -4.7674031257629395, "loss": 0.5981, "nll_loss": 0.560860812664032, "rewards/accuracies": 0.875, "rewards/chosen": -0.2386203408241272, "rewards/margins": 0.238119974732399, "rewards/rejected": -0.4767403304576874, "step": 5204 }, { "epoch": 14.250513347022586, "grad_norm": 3.3623485565185547, "learning_rate": 2.86986301369863e-07, "log_odds_chosen": 2.537252426147461, "log_odds_ratio": -0.14549602568149567, "logits/chosen": 1.1885288953781128, "logits/rejected": 1.1905497312545776, "logps/chosen": -1.75075364112854, "logps/rejected": -4.1150078773498535, "loss": 0.4687, "nll_loss": 0.4541635513305664, "rewards/accuracies": 1.0, "rewards/chosen": -0.17507535219192505, "rewards/margins": 0.23642544448375702, "rewards/rejected": -0.4115007817745209, "step": 5205 }, { "epoch": 14.25325119780972, "grad_norm": 3.5834200382232666, "learning_rate": 2.8684931506849316e-07, "log_odds_chosen": 3.79221773147583, "log_odds_ratio": -0.10790979862213135, "logits/chosen": 1.0871496200561523, "logits/rejected": 1.048917293548584, "logps/chosen": -1.4230320453643799, "logps/rejected": -4.913013458251953, "loss": 0.62, "nll_loss": 0.6092336177825928, "rewards/accuracies": 1.0, "rewards/chosen": -0.1423031985759735, "rewards/margins": 0.34899812936782837, "rewards/rejected": -0.49130135774612427, "step": 5206 }, { "epoch": 14.255989048596852, "grad_norm": 3.56654691696167, "learning_rate": 2.8671232876712326e-07, "log_odds_chosen": 3.4170026779174805, "log_odds_ratio": -0.10666196793317795, "logits/chosen": 0.9565107822418213, "logits/rejected": 0.9673439264297485, "logps/chosen": -1.6800607442855835, "logps/rejected": -4.808160305023193, "loss": 0.577, "nll_loss": 0.5663467049598694, "rewards/accuracies": 1.0, "rewards/chosen": -0.1680060774087906, "rewards/margins": 0.3128099739551544, "rewards/rejected": -0.4808160662651062, "step": 5207 }, { "epoch": 14.258726899383984, "grad_norm": 4.27417516708374, "learning_rate": 2.865753424657534e-07, "log_odds_chosen": 2.7413389682769775, "log_odds_ratio": -0.3126806914806366, "logits/chosen": 1.497370719909668, "logits/rejected": 1.527746319770813, "logps/chosen": -1.649162769317627, "logps/rejected": -4.069308280944824, "loss": 0.4088, "nll_loss": 0.37753593921661377, "rewards/accuracies": 1.0, "rewards/chosen": -0.1649162769317627, "rewards/margins": 0.2420145869255066, "rewards/rejected": -0.4069308638572693, "step": 5208 }, { "epoch": 14.261464750171116, "grad_norm": 3.5371766090393066, "learning_rate": 2.8643835616438356e-07, "log_odds_chosen": 4.369536876678467, "log_odds_ratio": -0.17356564104557037, "logits/chosen": 1.0017750263214111, "logits/rejected": 0.997398853302002, "logps/chosen": -1.7192859649658203, "logps/rejected": -5.861565113067627, "loss": 0.592, "nll_loss": 0.5746490955352783, "rewards/accuracies": 1.0, "rewards/chosen": -0.17192859947681427, "rewards/margins": 0.4142279028892517, "rewards/rejected": -0.5861565470695496, "step": 5209 }, { "epoch": 14.264202600958248, "grad_norm": 4.993546485900879, "learning_rate": 2.8630136986301366e-07, "log_odds_chosen": 1.4012410640716553, "log_odds_ratio": -0.2756701111793518, "logits/chosen": 0.9779507517814636, "logits/rejected": 0.933205783367157, "logps/chosen": -1.8974800109863281, "logps/rejected": -3.086466073989868, "loss": 0.5734, "nll_loss": 0.5458702445030212, "rewards/accuracies": 1.0, "rewards/chosen": -0.18974798917770386, "rewards/margins": 0.11889861524105072, "rewards/rejected": -0.30864661931991577, "step": 5210 }, { "epoch": 14.26694045174538, "grad_norm": 3.7573015689849854, "learning_rate": 2.861643835616438e-07, "log_odds_chosen": 0.9878367185592651, "log_odds_ratio": -0.5212152600288391, "logits/chosen": 0.9235436320304871, "logits/rejected": 0.8710628747940063, "logps/chosen": -1.8305366039276123, "logps/rejected": -2.679892063140869, "loss": 0.4912, "nll_loss": 0.4390631318092346, "rewards/accuracies": 0.875, "rewards/chosen": -0.18305367231369019, "rewards/margins": 0.08493553847074509, "rewards/rejected": -0.26798921823501587, "step": 5211 }, { "epoch": 14.269678302532512, "grad_norm": 3.6058225631713867, "learning_rate": 2.8602739726027396e-07, "log_odds_chosen": 4.187155723571777, "log_odds_ratio": -0.10599096119403839, "logits/chosen": 1.1079754829406738, "logits/rejected": 1.1160719394683838, "logps/chosen": -1.7337088584899902, "logps/rejected": -5.5446271896362305, "loss": 0.5774, "nll_loss": 0.5667753219604492, "rewards/accuracies": 1.0, "rewards/chosen": -0.17337089776992798, "rewards/margins": 0.381091833114624, "rewards/rejected": -0.554462730884552, "step": 5212 }, { "epoch": 14.272416153319645, "grad_norm": 6.291214942932129, "learning_rate": 2.858904109589041e-07, "log_odds_chosen": 2.489659547805786, "log_odds_ratio": -0.27159571647644043, "logits/chosen": 1.1720280647277832, "logits/rejected": 1.1862139701843262, "logps/chosen": -2.659597396850586, "logps/rejected": -5.058625221252441, "loss": 0.7101, "nll_loss": 0.6828939914703369, "rewards/accuracies": 0.875, "rewards/chosen": -0.2659597396850586, "rewards/margins": 0.2399027794599533, "rewards/rejected": -0.5058624744415283, "step": 5213 }, { "epoch": 14.275154004106776, "grad_norm": 4.630910873413086, "learning_rate": 2.857534246575342e-07, "log_odds_chosen": 4.358197212219238, "log_odds_ratio": -0.23834937810897827, "logits/chosen": 0.9659653306007385, "logits/rejected": 1.0037808418273926, "logps/chosen": -1.8909246921539307, "logps/rejected": -6.117924690246582, "loss": 0.6354, "nll_loss": 0.6115354299545288, "rewards/accuracies": 1.0, "rewards/chosen": -0.1890924721956253, "rewards/margins": 0.42270001769065857, "rewards/rejected": -0.6117924451828003, "step": 5214 }, { "epoch": 14.277891854893909, "grad_norm": 4.551575183868408, "learning_rate": 2.8561643835616437e-07, "log_odds_chosen": 1.9402530193328857, "log_odds_ratio": -0.23794643580913544, "logits/chosen": 1.1736081838607788, "logits/rejected": 1.2387208938598633, "logps/chosen": -2.4101858139038086, "logps/rejected": -4.274231910705566, "loss": 0.6367, "nll_loss": 0.612912654876709, "rewards/accuracies": 1.0, "rewards/chosen": -0.24101859331130981, "rewards/margins": 0.18640458583831787, "rewards/rejected": -0.4274231791496277, "step": 5215 }, { "epoch": 14.28062970568104, "grad_norm": 3.876128911972046, "learning_rate": 2.854794520547945e-07, "log_odds_chosen": 2.722719192504883, "log_odds_ratio": -0.21269844472408295, "logits/chosen": 0.9324904680252075, "logits/rejected": 0.9056963920593262, "logps/chosen": -2.015486240386963, "logps/rejected": -4.609070777893066, "loss": 0.5903, "nll_loss": 0.5690109729766846, "rewards/accuracies": 1.0, "rewards/chosen": -0.20154863595962524, "rewards/margins": 0.2593584358692169, "rewards/rejected": -0.46090707182884216, "step": 5216 }, { "epoch": 14.283367556468173, "grad_norm": 4.919528961181641, "learning_rate": 2.853424657534246e-07, "log_odds_chosen": 0.7396966218948364, "log_odds_ratio": -0.44668716192245483, "logits/chosen": 1.205329418182373, "logits/rejected": 1.2084667682647705, "logps/chosen": -1.884069800376892, "logps/rejected": -2.5189223289489746, "loss": 0.5181, "nll_loss": 0.4734809398651123, "rewards/accuracies": 0.75, "rewards/chosen": -0.18840700387954712, "rewards/margins": 0.06348525732755661, "rewards/rejected": -0.25189223885536194, "step": 5217 }, { "epoch": 14.286105407255304, "grad_norm": 4.4103169441223145, "learning_rate": 2.852054794520548e-07, "log_odds_chosen": 2.660904884338379, "log_odds_ratio": -0.3879487216472626, "logits/chosen": 1.0066081285476685, "logits/rejected": 1.0552077293395996, "logps/chosen": -2.337049961090088, "logps/rejected": -4.8779497146606445, "loss": 0.7196, "nll_loss": 0.6808013319969177, "rewards/accuracies": 0.875, "rewards/chosen": -0.23370499908924103, "rewards/margins": 0.25408995151519775, "rewards/rejected": -0.4877949357032776, "step": 5218 }, { "epoch": 14.288843258042437, "grad_norm": 3.5874428749084473, "learning_rate": 2.850684931506849e-07, "log_odds_chosen": 2.650923490524292, "log_odds_ratio": -0.15193074941635132, "logits/chosen": 1.0844982862472534, "logits/rejected": 1.0050793886184692, "logps/chosen": -1.8127470016479492, "logps/rejected": -4.277975082397461, "loss": 0.5115, "nll_loss": 0.49632173776626587, "rewards/accuracies": 1.0, "rewards/chosen": -0.18127471208572388, "rewards/margins": 0.24652278423309326, "rewards/rejected": -0.42779749631881714, "step": 5219 }, { "epoch": 14.291581108829568, "grad_norm": 3.302058219909668, "learning_rate": 2.849315068493151e-07, "log_odds_chosen": 3.306154251098633, "log_odds_ratio": -0.135651633143425, "logits/chosen": 0.9928054809570312, "logits/rejected": 1.004929542541504, "logps/chosen": -1.7319904565811157, "logps/rejected": -4.830897808074951, "loss": 0.5306, "nll_loss": 0.5170095562934875, "rewards/accuracies": 1.0, "rewards/chosen": -0.17319905757904053, "rewards/margins": 0.3098907172679901, "rewards/rejected": -0.48308974504470825, "step": 5220 }, { "epoch": 14.294318959616701, "grad_norm": 4.153315544128418, "learning_rate": 2.8479452054794517e-07, "log_odds_chosen": 1.9340742826461792, "log_odds_ratio": -0.24950648844242096, "logits/chosen": 1.0326473712921143, "logits/rejected": 1.0544973611831665, "logps/chosen": -1.570164442062378, "logps/rejected": -3.3444900512695312, "loss": 0.5917, "nll_loss": 0.5667182803153992, "rewards/accuracies": 1.0, "rewards/chosen": -0.15701644122600555, "rewards/margins": 0.1774325668811798, "rewards/rejected": -0.33444902300834656, "step": 5221 }, { "epoch": 14.297056810403832, "grad_norm": 3.2770683765411377, "learning_rate": 2.846575342465753e-07, "log_odds_chosen": 2.212756395339966, "log_odds_ratio": -0.19637365639209747, "logits/chosen": 0.9348730444908142, "logits/rejected": 0.8985418677330017, "logps/chosen": -1.8809421062469482, "logps/rejected": -3.9063711166381836, "loss": 0.4932, "nll_loss": 0.4735143184661865, "rewards/accuracies": 0.875, "rewards/chosen": -0.18809422850608826, "rewards/margins": 0.20254291594028473, "rewards/rejected": -0.3906371295452118, "step": 5222 }, { "epoch": 14.299794661190965, "grad_norm": 4.169599533081055, "learning_rate": 2.845205479452055e-07, "log_odds_chosen": 3.0837326049804688, "log_odds_ratio": -0.14776769280433655, "logits/chosen": 1.281267762184143, "logits/rejected": 1.3061892986297607, "logps/chosen": -1.2635080814361572, "logps/rejected": -4.053842544555664, "loss": 0.492, "nll_loss": 0.4772464632987976, "rewards/accuracies": 1.0, "rewards/chosen": -0.12635082006454468, "rewards/margins": 0.27903348207473755, "rewards/rejected": -0.40538427233695984, "step": 5223 }, { "epoch": 14.302532511978097, "grad_norm": 3.8159565925598145, "learning_rate": 2.843835616438356e-07, "log_odds_chosen": 1.8700551986694336, "log_odds_ratio": -0.23033078014850616, "logits/chosen": 1.113406777381897, "logits/rejected": 1.091609239578247, "logps/chosen": -1.583052396774292, "logps/rejected": -3.2593612670898438, "loss": 0.4397, "nll_loss": 0.41670119762420654, "rewards/accuracies": 1.0, "rewards/chosen": -0.15830525755882263, "rewards/margins": 0.1676308810710907, "rewards/rejected": -0.32593613862991333, "step": 5224 }, { "epoch": 14.30527036276523, "grad_norm": 3.1062047481536865, "learning_rate": 2.842465753424658e-07, "log_odds_chosen": 2.7890419960021973, "log_odds_ratio": -0.17664878070354462, "logits/chosen": 1.0628697872161865, "logits/rejected": 1.034436821937561, "logps/chosen": -1.4965832233428955, "logps/rejected": -4.058323383331299, "loss": 0.5164, "nll_loss": 0.49872714281082153, "rewards/accuracies": 1.0, "rewards/chosen": -0.14965832233428955, "rewards/margins": 0.2561740279197693, "rewards/rejected": -0.40583235025405884, "step": 5225 }, { "epoch": 14.30800821355236, "grad_norm": 4.5946550369262695, "learning_rate": 2.841095890410959e-07, "log_odds_chosen": 1.7643758058547974, "log_odds_ratio": -0.33139413595199585, "logits/chosen": 1.2110462188720703, "logits/rejected": 1.2122670412063599, "logps/chosen": -1.968603491783142, "logps/rejected": -3.5767102241516113, "loss": 0.6169, "nll_loss": 0.5837976932525635, "rewards/accuracies": 0.875, "rewards/chosen": -0.19686034321784973, "rewards/margins": 0.1608106791973114, "rewards/rejected": -0.35767102241516113, "step": 5226 }, { "epoch": 14.310746064339494, "grad_norm": 3.947894334793091, "learning_rate": 2.8397260273972603e-07, "log_odds_chosen": 2.933417797088623, "log_odds_ratio": -0.10991925001144409, "logits/chosen": 1.2212638854980469, "logits/rejected": 1.287475347518921, "logps/chosen": -2.1181063652038574, "logps/rejected": -4.917040824890137, "loss": 0.576, "nll_loss": 0.5650317668914795, "rewards/accuracies": 1.0, "rewards/chosen": -0.2118106484413147, "rewards/margins": 0.2798934578895569, "rewards/rejected": -0.4917040765285492, "step": 5227 }, { "epoch": 14.313483915126625, "grad_norm": 3.611678123474121, "learning_rate": 2.8383561643835613e-07, "log_odds_chosen": 3.0098767280578613, "log_odds_ratio": -0.21383048593997955, "logits/chosen": 1.303725242614746, "logits/rejected": 1.2944800853729248, "logps/chosen": -1.3771497011184692, "logps/rejected": -4.162508964538574, "loss": 0.5217, "nll_loss": 0.5003201365470886, "rewards/accuracies": 1.0, "rewards/chosen": -0.13771496713161469, "rewards/margins": 0.278535932302475, "rewards/rejected": -0.41625091433525085, "step": 5228 }, { "epoch": 14.316221765913758, "grad_norm": 3.9939589500427246, "learning_rate": 2.836986301369863e-07, "log_odds_chosen": 1.7705401182174683, "log_odds_ratio": -0.2049395591020584, "logits/chosen": 1.4304592609405518, "logits/rejected": 1.4677178859710693, "logps/chosen": -2.0428390502929688, "logps/rejected": -3.5962231159210205, "loss": 0.4776, "nll_loss": 0.4571534991264343, "rewards/accuracies": 1.0, "rewards/chosen": -0.2042839229106903, "rewards/margins": 0.15533842146396637, "rewards/rejected": -0.3596222996711731, "step": 5229 }, { "epoch": 14.318959616700889, "grad_norm": 6.508525371551514, "learning_rate": 2.8356164383561644e-07, "log_odds_chosen": 1.9561834335327148, "log_odds_ratio": -0.20403426885604858, "logits/chosen": 1.1951096057891846, "logits/rejected": 1.1948838233947754, "logps/chosen": -2.027810573577881, "logps/rejected": -3.8312206268310547, "loss": 0.6266, "nll_loss": 0.6062256097793579, "rewards/accuracies": 1.0, "rewards/chosen": -0.2027810513973236, "rewards/margins": 0.18034100532531738, "rewards/rejected": -0.3831220865249634, "step": 5230 }, { "epoch": 14.321697467488022, "grad_norm": 5.027334690093994, "learning_rate": 2.8342465753424653e-07, "log_odds_chosen": 2.560784101486206, "log_odds_ratio": -0.4375148415565491, "logits/chosen": 0.9703185558319092, "logits/rejected": 0.9049403071403503, "logps/chosen": -2.2109782695770264, "logps/rejected": -4.540124893188477, "loss": 0.5304, "nll_loss": 0.48666656017303467, "rewards/accuracies": 0.875, "rewards/chosen": -0.22109782695770264, "rewards/margins": 0.23291467130184174, "rewards/rejected": -0.45401254296302795, "step": 5231 }, { "epoch": 14.324435318275153, "grad_norm": 6.383016109466553, "learning_rate": 2.8328767123287674e-07, "log_odds_chosen": 1.1954158544540405, "log_odds_ratio": -0.5468666553497314, "logits/chosen": 1.2340114116668701, "logits/rejected": 1.2236688137054443, "logps/chosen": -2.762904167175293, "logps/rejected": -3.8702592849731445, "loss": 0.6417, "nll_loss": 0.5870187282562256, "rewards/accuracies": 0.875, "rewards/chosen": -0.2762904167175293, "rewards/margins": 0.11073554307222366, "rewards/rejected": -0.38702595233917236, "step": 5232 }, { "epoch": 14.327173169062286, "grad_norm": 3.630385160446167, "learning_rate": 2.8315068493150684e-07, "log_odds_chosen": 2.3516416549682617, "log_odds_ratio": -0.21105749905109406, "logits/chosen": 1.3409976959228516, "logits/rejected": 1.3598096370697021, "logps/chosen": -1.2610243558883667, "logps/rejected": -3.290693521499634, "loss": 0.482, "nll_loss": 0.4609365463256836, "rewards/accuracies": 0.875, "rewards/chosen": -0.12610244750976562, "rewards/margins": 0.20296692848205566, "rewards/rejected": -0.3290693759918213, "step": 5233 }, { "epoch": 14.329911019849419, "grad_norm": 3.715599536895752, "learning_rate": 2.83013698630137e-07, "log_odds_chosen": 2.3768599033355713, "log_odds_ratio": -0.11784875392913818, "logits/chosen": 1.0256357192993164, "logits/rejected": 1.0092962980270386, "logps/chosen": -1.8705945014953613, "logps/rejected": -4.065009117126465, "loss": 0.5762, "nll_loss": 0.5644307136535645, "rewards/accuracies": 1.0, "rewards/chosen": -0.1870594471693039, "rewards/margins": 0.2194414585828781, "rewards/rejected": -0.4065009355545044, "step": 5234 }, { "epoch": 14.33264887063655, "grad_norm": 3.205953598022461, "learning_rate": 2.828767123287671e-07, "log_odds_chosen": 2.052700996398926, "log_odds_ratio": -0.2257215827703476, "logits/chosen": 1.2110869884490967, "logits/rejected": 1.1219844818115234, "logps/chosen": -1.7000041007995605, "logps/rejected": -3.551211357116699, "loss": 0.5545, "nll_loss": 0.5318989753723145, "rewards/accuracies": 1.0, "rewards/chosen": -0.17000041902065277, "rewards/margins": 0.18512073159217834, "rewards/rejected": -0.3551211357116699, "step": 5235 }, { "epoch": 14.335386721423683, "grad_norm": 4.780529975891113, "learning_rate": 2.8273972602739724e-07, "log_odds_chosen": 2.719973564147949, "log_odds_ratio": -0.36427366733551025, "logits/chosen": 1.2415622472763062, "logits/rejected": 1.265026330947876, "logps/chosen": -2.043779134750366, "logps/rejected": -4.671076774597168, "loss": 0.5891, "nll_loss": 0.5526356101036072, "rewards/accuracies": 0.875, "rewards/chosen": -0.2043779343366623, "rewards/margins": 0.2627297639846802, "rewards/rejected": -0.46710771322250366, "step": 5236 }, { "epoch": 14.338124572210814, "grad_norm": 10.221954345703125, "learning_rate": 2.826027397260274e-07, "log_odds_chosen": 0.9845277667045593, "log_odds_ratio": -1.2396931648254395, "logits/chosen": 0.8892520666122437, "logits/rejected": 0.8308466076850891, "logps/chosen": -2.656582832336426, "logps/rejected": -3.5512943267822266, "loss": 0.6767, "nll_loss": 0.5527771711349487, "rewards/accuracies": 0.75, "rewards/chosen": -0.26565825939178467, "rewards/margins": 0.08947117626667023, "rewards/rejected": -0.3551294207572937, "step": 5237 }, { "epoch": 14.340862422997947, "grad_norm": 6.9796576499938965, "learning_rate": 2.824657534246575e-07, "log_odds_chosen": 2.6060807704925537, "log_odds_ratio": -0.4323195219039917, "logits/chosen": 1.301512598991394, "logits/rejected": 1.3505322933197021, "logps/chosen": -2.2092130184173584, "logps/rejected": -4.733953952789307, "loss": 0.519, "nll_loss": 0.47572606801986694, "rewards/accuracies": 0.875, "rewards/chosen": -0.22092130780220032, "rewards/margins": 0.2524740993976593, "rewards/rejected": -0.4733954071998596, "step": 5238 }, { "epoch": 14.343600273785079, "grad_norm": 5.53141450881958, "learning_rate": 2.823287671232877e-07, "log_odds_chosen": 1.9039802551269531, "log_odds_ratio": -0.46514829993247986, "logits/chosen": 0.9261543154716492, "logits/rejected": 0.8196744918823242, "logps/chosen": -1.6219878196716309, "logps/rejected": -3.2679686546325684, "loss": 0.5467, "nll_loss": 0.5001469850540161, "rewards/accuracies": 0.875, "rewards/chosen": -0.16219878196716309, "rewards/margins": 0.16459806263446808, "rewards/rejected": -0.32679682970046997, "step": 5239 }, { "epoch": 14.346338124572211, "grad_norm": 3.8470616340637207, "learning_rate": 2.821917808219178e-07, "log_odds_chosen": 5.403870582580566, "log_odds_ratio": -0.11823021620512009, "logits/chosen": 1.2492399215698242, "logits/rejected": 1.2811492681503296, "logps/chosen": -2.6430487632751465, "logps/rejected": -7.9099225997924805, "loss": 0.5739, "nll_loss": 0.562027096748352, "rewards/accuracies": 1.0, "rewards/chosen": -0.26430487632751465, "rewards/margins": 0.5266873836517334, "rewards/rejected": -0.7909923195838928, "step": 5240 }, { "epoch": 14.349075975359343, "grad_norm": 3.7916922569274902, "learning_rate": 2.820547945205479e-07, "log_odds_chosen": 3.4544620513916016, "log_odds_ratio": -0.14725716412067413, "logits/chosen": 1.2016775608062744, "logits/rejected": 1.2251092195510864, "logps/chosen": -2.1762802600860596, "logps/rejected": -5.456118583679199, "loss": 0.5834, "nll_loss": 0.5686681270599365, "rewards/accuracies": 1.0, "rewards/chosen": -0.21762803196907043, "rewards/margins": 0.3279838562011719, "rewards/rejected": -0.5456118583679199, "step": 5241 }, { "epoch": 14.351813826146476, "grad_norm": 3.5877766609191895, "learning_rate": 2.8191780821917805e-07, "log_odds_chosen": 2.713805675506592, "log_odds_ratio": -0.29833468794822693, "logits/chosen": 0.738391101360321, "logits/rejected": 0.7502965927124023, "logps/chosen": -2.070157051086426, "logps/rejected": -4.654209613800049, "loss": 0.5703, "nll_loss": 0.5404953956604004, "rewards/accuracies": 0.75, "rewards/chosen": -0.20701569318771362, "rewards/margins": 0.25840526819229126, "rewards/rejected": -0.46542099118232727, "step": 5242 }, { "epoch": 14.354551676933607, "grad_norm": 3.1465303897857666, "learning_rate": 2.817808219178082e-07, "log_odds_chosen": 3.560758590698242, "log_odds_ratio": -0.1357439160346985, "logits/chosen": 1.0761350393295288, "logits/rejected": 1.0591181516647339, "logps/chosen": -1.2067710161209106, "logps/rejected": -4.410515308380127, "loss": 0.4402, "nll_loss": 0.4266359508037567, "rewards/accuracies": 1.0, "rewards/chosen": -0.12067709863185883, "rewards/margins": 0.32037439942359924, "rewards/rejected": -0.44105154275894165, "step": 5243 }, { "epoch": 14.35728952772074, "grad_norm": 3.926485300064087, "learning_rate": 2.8164383561643835e-07, "log_odds_chosen": 3.7202000617980957, "log_odds_ratio": -0.1783251315355301, "logits/chosen": 1.0717475414276123, "logits/rejected": 1.0402722358703613, "logps/chosen": -2.959552764892578, "logps/rejected": -6.637443542480469, "loss": 0.6183, "nll_loss": 0.600493311882019, "rewards/accuracies": 0.875, "rewards/chosen": -0.2959553003311157, "rewards/margins": 0.36778903007507324, "rewards/rejected": -0.663744330406189, "step": 5244 }, { "epoch": 14.360027378507871, "grad_norm": 4.7296671867370605, "learning_rate": 2.8150684931506845e-07, "log_odds_chosen": 1.8012937307357788, "log_odds_ratio": -0.23120561242103577, "logits/chosen": 0.9581986665725708, "logits/rejected": 0.8777903318405151, "logps/chosen": -2.172715187072754, "logps/rejected": -3.838776111602783, "loss": 0.6218, "nll_loss": 0.5986825227737427, "rewards/accuracies": 1.0, "rewards/chosen": -0.21727152168750763, "rewards/margins": 0.1666060835123062, "rewards/rejected": -0.38387760519981384, "step": 5245 }, { "epoch": 14.362765229295004, "grad_norm": 3.007317304611206, "learning_rate": 2.8136986301369866e-07, "log_odds_chosen": 3.7173449993133545, "log_odds_ratio": -0.10010699927806854, "logits/chosen": 1.4353001117706299, "logits/rejected": 1.4723303318023682, "logps/chosen": -1.9322091341018677, "logps/rejected": -5.431072235107422, "loss": 0.5306, "nll_loss": 0.5205991268157959, "rewards/accuracies": 1.0, "rewards/chosen": -0.19322091341018677, "rewards/margins": 0.34988635778427124, "rewards/rejected": -0.5431072115898132, "step": 5246 }, { "epoch": 14.365503080082135, "grad_norm": 3.4849956035614014, "learning_rate": 2.8123287671232876e-07, "log_odds_chosen": 2.3633689880371094, "log_odds_ratio": -0.25148335099220276, "logits/chosen": 1.1951773166656494, "logits/rejected": 1.1912903785705566, "logps/chosen": -1.791247010231018, "logps/rejected": -3.9788992404937744, "loss": 0.5273, "nll_loss": 0.5021209716796875, "rewards/accuracies": 1.0, "rewards/chosen": -0.17912469804286957, "rewards/margins": 0.2187652438879013, "rewards/rejected": -0.39788997173309326, "step": 5247 }, { "epoch": 14.368240930869268, "grad_norm": 8.834273338317871, "learning_rate": 2.8109589041095886e-07, "log_odds_chosen": 2.6886205673217773, "log_odds_ratio": -0.22549352049827576, "logits/chosen": 1.2732993364334106, "logits/rejected": 1.335319995880127, "logps/chosen": -2.698777198791504, "logps/rejected": -5.2664594650268555, "loss": 0.716, "nll_loss": 0.6934951543807983, "rewards/accuracies": 0.875, "rewards/chosen": -0.26987773180007935, "rewards/margins": 0.25676819682121277, "rewards/rejected": -0.5266459584236145, "step": 5248 }, { "epoch": 14.3709787816564, "grad_norm": 8.34950065612793, "learning_rate": 2.80958904109589e-07, "log_odds_chosen": 1.4891852140426636, "log_odds_ratio": -0.4637461006641388, "logits/chosen": 1.2156763076782227, "logits/rejected": 1.1552728414535522, "logps/chosen": -2.4242467880249023, "logps/rejected": -3.739792823791504, "loss": 0.6774, "nll_loss": 0.6310157179832458, "rewards/accuracies": 0.875, "rewards/chosen": -0.24242469668388367, "rewards/margins": 0.13155458867549896, "rewards/rejected": -0.37397927045822144, "step": 5249 }, { "epoch": 14.373716632443532, "grad_norm": 7.017276763916016, "learning_rate": 2.8082191780821916e-07, "log_odds_chosen": 2.5584559440612793, "log_odds_ratio": -0.30235445499420166, "logits/chosen": 1.1228665113449097, "logits/rejected": 1.0686085224151611, "logps/chosen": -2.5741026401519775, "logps/rejected": -4.967510223388672, "loss": 0.5542, "nll_loss": 0.5240050554275513, "rewards/accuracies": 0.875, "rewards/chosen": -0.25741028785705566, "rewards/margins": 0.23934076726436615, "rewards/rejected": -0.4967510402202606, "step": 5250 }, { "epoch": 14.376454483230663, "grad_norm": 3.924959421157837, "learning_rate": 2.806849315068493e-07, "log_odds_chosen": 2.780256748199463, "log_odds_ratio": -0.11658316850662231, "logits/chosen": 0.9232453107833862, "logits/rejected": 0.8535336256027222, "logps/chosen": -1.2058027982711792, "logps/rejected": -3.671095371246338, "loss": 0.4913, "nll_loss": 0.47964179515838623, "rewards/accuracies": 1.0, "rewards/chosen": -0.1205802708864212, "rewards/margins": 0.24652929604053497, "rewards/rejected": -0.3671095669269562, "step": 5251 }, { "epoch": 14.379192334017796, "grad_norm": 3.363225221633911, "learning_rate": 2.805479452054794e-07, "log_odds_chosen": 3.4579782485961914, "log_odds_ratio": -0.16150367259979248, "logits/chosen": 0.9880748391151428, "logits/rejected": 1.0409834384918213, "logps/chosen": -1.3963466882705688, "logps/rejected": -4.6131367683410645, "loss": 0.5804, "nll_loss": 0.5642378330230713, "rewards/accuracies": 1.0, "rewards/chosen": -0.13963466882705688, "rewards/margins": 0.3216789662837982, "rewards/rejected": -0.4613136351108551, "step": 5252 }, { "epoch": 14.381930184804927, "grad_norm": 7.019364356994629, "learning_rate": 2.804109589041096e-07, "log_odds_chosen": 1.606547474861145, "log_odds_ratio": -0.3955974578857422, "logits/chosen": 1.168423056602478, "logits/rejected": 1.1208570003509521, "logps/chosen": -2.425804615020752, "logps/rejected": -3.853037118911743, "loss": 0.605, "nll_loss": 0.5654363632202148, "rewards/accuracies": 0.875, "rewards/chosen": -0.24258047342300415, "rewards/margins": 0.14272326231002808, "rewards/rejected": -0.3853037357330322, "step": 5253 }, { "epoch": 14.38466803559206, "grad_norm": 3.7264604568481445, "learning_rate": 2.802739726027397e-07, "log_odds_chosen": 1.6527842283248901, "log_odds_ratio": -0.28864148259162903, "logits/chosen": 1.1475783586502075, "logits/rejected": 1.0896425247192383, "logps/chosen": -1.2705698013305664, "logps/rejected": -2.6960315704345703, "loss": 0.5406, "nll_loss": 0.5116888284683228, "rewards/accuracies": 1.0, "rewards/chosen": -0.12705698609352112, "rewards/margins": 0.1425461769104004, "rewards/rejected": -0.2696031332015991, "step": 5254 }, { "epoch": 14.387405886379192, "grad_norm": 4.127849578857422, "learning_rate": 2.801369863013698e-07, "log_odds_chosen": 2.5332248210906982, "log_odds_ratio": -0.1393996775150299, "logits/chosen": 1.3872430324554443, "logits/rejected": 1.4335018396377563, "logps/chosen": -1.7157286405563354, "logps/rejected": -4.038082122802734, "loss": 0.5151, "nll_loss": 0.5011383295059204, "rewards/accuracies": 1.0, "rewards/chosen": -0.17157286405563354, "rewards/margins": 0.23223532736301422, "rewards/rejected": -0.4038081765174866, "step": 5255 }, { "epoch": 14.390143737166325, "grad_norm": 6.502204418182373, "learning_rate": 2.8e-07, "log_odds_chosen": 1.971808671951294, "log_odds_ratio": -0.29348281025886536, "logits/chosen": 0.9879194498062134, "logits/rejected": 0.9813187718391418, "logps/chosen": -1.9319572448730469, "logps/rejected": -3.726942777633667, "loss": 0.5767, "nll_loss": 0.5474008321762085, "rewards/accuracies": 0.875, "rewards/chosen": -0.19319573044776917, "rewards/margins": 0.179498553276062, "rewards/rejected": -0.3726942837238312, "step": 5256 }, { "epoch": 14.392881587953456, "grad_norm": 3.9402830600738525, "learning_rate": 2.798630136986301e-07, "log_odds_chosen": 2.883664608001709, "log_odds_ratio": -0.2271217703819275, "logits/chosen": 1.1422518491744995, "logits/rejected": 1.1642601490020752, "logps/chosen": -3.11541485786438, "logps/rejected": -5.90242862701416, "loss": 0.6433, "nll_loss": 0.6206240057945251, "rewards/accuracies": 0.875, "rewards/chosen": -0.31154149770736694, "rewards/margins": 0.2787013649940491, "rewards/rejected": -0.590242862701416, "step": 5257 }, { "epoch": 14.395619438740589, "grad_norm": 3.6648614406585693, "learning_rate": 2.7972602739726027e-07, "log_odds_chosen": 3.479670524597168, "log_odds_ratio": -0.143040269613266, "logits/chosen": 1.061134934425354, "logits/rejected": 1.056298851966858, "logps/chosen": -1.9493050575256348, "logps/rejected": -5.212732315063477, "loss": 0.5791, "nll_loss": 0.5647907853126526, "rewards/accuracies": 1.0, "rewards/chosen": -0.19493049383163452, "rewards/margins": 0.32634279131889343, "rewards/rejected": -0.5212733149528503, "step": 5258 }, { "epoch": 14.39835728952772, "grad_norm": 7.679060459136963, "learning_rate": 2.7958904109589037e-07, "log_odds_chosen": 2.741283416748047, "log_odds_ratio": -0.28240880370140076, "logits/chosen": 1.5674147605895996, "logits/rejected": 1.586277723312378, "logps/chosen": -1.968705177307129, "logps/rejected": -4.472722053527832, "loss": 0.5505, "nll_loss": 0.5222969651222229, "rewards/accuracies": 0.875, "rewards/chosen": -0.19687052071094513, "rewards/margins": 0.25040170550346375, "rewards/rejected": -0.4472722113132477, "step": 5259 }, { "epoch": 14.401095140314853, "grad_norm": 3.5856916904449463, "learning_rate": 2.794520547945206e-07, "log_odds_chosen": 4.2880539894104, "log_odds_ratio": -0.15813682973384857, "logits/chosen": 1.1601568460464478, "logits/rejected": 1.1995161771774292, "logps/chosen": -2.14251971244812, "logps/rejected": -6.286440849304199, "loss": 0.6869, "nll_loss": 0.6710773706436157, "rewards/accuracies": 1.0, "rewards/chosen": -0.21425196528434753, "rewards/margins": 0.4143921434879303, "rewards/rejected": -0.6286441087722778, "step": 5260 }, { "epoch": 14.403832991101986, "grad_norm": 3.7236592769622803, "learning_rate": 2.793150684931507e-07, "log_odds_chosen": 2.9933977127075195, "log_odds_ratio": -0.19182854890823364, "logits/chosen": 1.0210258960723877, "logits/rejected": 1.032326579093933, "logps/chosen": -1.9845855236053467, "logps/rejected": -4.804706573486328, "loss": 0.5318, "nll_loss": 0.5126012563705444, "rewards/accuracies": 1.0, "rewards/chosen": -0.19845855236053467, "rewards/margins": 0.2820121645927429, "rewards/rejected": -0.4804706573486328, "step": 5261 }, { "epoch": 14.406570841889117, "grad_norm": 4.134161949157715, "learning_rate": 2.7917808219178077e-07, "log_odds_chosen": 3.3830389976501465, "log_odds_ratio": -0.29660719633102417, "logits/chosen": 0.9210391044616699, "logits/rejected": 0.9295666217803955, "logps/chosen": -1.845068097114563, "logps/rejected": -4.956910133361816, "loss": 0.5638, "nll_loss": 0.5341777801513672, "rewards/accuracies": 0.875, "rewards/chosen": -0.18450681865215302, "rewards/margins": 0.31118419766426086, "rewards/rejected": -0.4956910014152527, "step": 5262 }, { "epoch": 14.40930869267625, "grad_norm": 3.3626465797424316, "learning_rate": 2.79041095890411e-07, "log_odds_chosen": 2.4679951667785645, "log_odds_ratio": -0.19377976655960083, "logits/chosen": 1.2927546501159668, "logits/rejected": 1.2457730770111084, "logps/chosen": -2.0710268020629883, "logps/rejected": -4.373224258422852, "loss": 0.4529, "nll_loss": 0.4335366189479828, "rewards/accuracies": 1.0, "rewards/chosen": -0.2071026861667633, "rewards/margins": 0.23021972179412842, "rewards/rejected": -0.43732237815856934, "step": 5263 }, { "epoch": 14.412046543463381, "grad_norm": 4.407180309295654, "learning_rate": 2.789041095890411e-07, "log_odds_chosen": 2.26680064201355, "log_odds_ratio": -0.4168267250061035, "logits/chosen": 1.1031811237335205, "logits/rejected": 1.0495142936706543, "logps/chosen": -2.0312628746032715, "logps/rejected": -4.170316696166992, "loss": 0.6028, "nll_loss": 0.5610992908477783, "rewards/accuracies": 0.75, "rewards/chosen": -0.20312629640102386, "rewards/margins": 0.21390537917613983, "rewards/rejected": -0.4170316457748413, "step": 5264 }, { "epoch": 14.414784394250514, "grad_norm": 3.730294704437256, "learning_rate": 2.7876712328767123e-07, "log_odds_chosen": 2.4905805587768555, "log_odds_ratio": -0.1516362577676773, "logits/chosen": 1.080554723739624, "logits/rejected": 1.03281831741333, "logps/chosen": -1.5282777547836304, "logps/rejected": -3.757841110229492, "loss": 0.4703, "nll_loss": 0.45511794090270996, "rewards/accuracies": 1.0, "rewards/chosen": -0.15282778441905975, "rewards/margins": 0.2229563593864441, "rewards/rejected": -0.37578412890434265, "step": 5265 }, { "epoch": 14.417522245037645, "grad_norm": 4.365162372589111, "learning_rate": 2.7863013698630133e-07, "log_odds_chosen": 4.016317367553711, "log_odds_ratio": -0.10448053479194641, "logits/chosen": 1.2804230451583862, "logits/rejected": 1.2951321601867676, "logps/chosen": -2.324604034423828, "logps/rejected": -6.190079689025879, "loss": 0.554, "nll_loss": 0.5435448884963989, "rewards/accuracies": 1.0, "rewards/chosen": -0.2324604094028473, "rewards/margins": 0.3865475058555603, "rewards/rejected": -0.61900794506073, "step": 5266 }, { "epoch": 14.420260095824778, "grad_norm": 3.371166467666626, "learning_rate": 2.7849315068493153e-07, "log_odds_chosen": 3.656749725341797, "log_odds_ratio": -0.2822703719139099, "logits/chosen": 1.0404146909713745, "logits/rejected": 1.0621893405914307, "logps/chosen": -1.6672685146331787, "logps/rejected": -5.0648932456970215, "loss": 0.5069, "nll_loss": 0.4786507487297058, "rewards/accuracies": 1.0, "rewards/chosen": -0.16672685742378235, "rewards/margins": 0.33976247906684875, "rewards/rejected": -0.5064893364906311, "step": 5267 }, { "epoch": 14.42299794661191, "grad_norm": 3.99493145942688, "learning_rate": 2.7835616438356163e-07, "log_odds_chosen": 1.9395698308944702, "log_odds_ratio": -0.2631392180919647, "logits/chosen": 0.8305391073226929, "logits/rejected": 0.8200910687446594, "logps/chosen": -1.9571365118026733, "logps/rejected": -3.78293514251709, "loss": 0.5711, "nll_loss": 0.5447957515716553, "rewards/accuracies": 1.0, "rewards/chosen": -0.19571366906166077, "rewards/margins": 0.18257984519004822, "rewards/rejected": -0.378293514251709, "step": 5268 }, { "epoch": 14.425735797399042, "grad_norm": 15.319807052612305, "learning_rate": 2.7821917808219173e-07, "log_odds_chosen": 1.687971830368042, "log_odds_ratio": -0.4173296093940735, "logits/chosen": 1.047509789466858, "logits/rejected": 1.0048725605010986, "logps/chosen": -3.0231683254241943, "logps/rejected": -4.655549049377441, "loss": 0.7171, "nll_loss": 0.6753755807876587, "rewards/accuracies": 0.75, "rewards/chosen": -0.3023168444633484, "rewards/margins": 0.1632380485534668, "rewards/rejected": -0.4655548930168152, "step": 5269 }, { "epoch": 14.428473648186174, "grad_norm": 3.687979221343994, "learning_rate": 2.7808219178082194e-07, "log_odds_chosen": 2.7919182777404785, "log_odds_ratio": -0.20879772305488586, "logits/chosen": 1.0606309175491333, "logits/rejected": 1.093013048171997, "logps/chosen": -1.8393014669418335, "logps/rejected": -4.446798324584961, "loss": 0.4969, "nll_loss": 0.4760432243347168, "rewards/accuracies": 0.875, "rewards/chosen": -0.1839301586151123, "rewards/margins": 0.2607496976852417, "rewards/rejected": -0.444679856300354, "step": 5270 }, { "epoch": 14.431211498973306, "grad_norm": 6.2537078857421875, "learning_rate": 2.7794520547945204e-07, "log_odds_chosen": 1.050815224647522, "log_odds_ratio": -0.4478849768638611, "logits/chosen": 1.1627979278564453, "logits/rejected": 1.067970633506775, "logps/chosen": -1.786258578300476, "logps/rejected": -2.579453468322754, "loss": 0.5013, "nll_loss": 0.45652636885643005, "rewards/accuracies": 0.75, "rewards/chosen": -0.17862585186958313, "rewards/margins": 0.07931949943304062, "rewards/rejected": -0.25794535875320435, "step": 5271 }, { "epoch": 14.433949349760438, "grad_norm": 3.6778759956359863, "learning_rate": 2.778082191780822e-07, "log_odds_chosen": 4.185478210449219, "log_odds_ratio": -0.06278521567583084, "logits/chosen": 1.144089698791504, "logits/rejected": 1.1266666650772095, "logps/chosen": -2.457575798034668, "logps/rejected": -6.42849588394165, "loss": 0.4946, "nll_loss": 0.48833388090133667, "rewards/accuracies": 1.0, "rewards/chosen": -0.24575760960578918, "rewards/margins": 0.39709198474884033, "rewards/rejected": -0.6428495645523071, "step": 5272 }, { "epoch": 14.43668720054757, "grad_norm": 3.6238858699798584, "learning_rate": 2.776712328767123e-07, "log_odds_chosen": 3.447711706161499, "log_odds_ratio": -0.09426894038915634, "logits/chosen": 1.1917256116867065, "logits/rejected": 1.2178958654403687, "logps/chosen": -1.7627270221710205, "logps/rejected": -4.990792751312256, "loss": 0.5398, "nll_loss": 0.5303986072540283, "rewards/accuracies": 1.0, "rewards/chosen": -0.17627272009849548, "rewards/margins": 0.32280659675598145, "rewards/rejected": -0.49907931685447693, "step": 5273 }, { "epoch": 14.439425051334702, "grad_norm": 5.9411115646362305, "learning_rate": 2.775342465753425e-07, "log_odds_chosen": 3.0649378299713135, "log_odds_ratio": -0.19906389713287354, "logits/chosen": 0.9023391604423523, "logits/rejected": 0.8388972282409668, "logps/chosen": -1.8111546039581299, "logps/rejected": -4.667696475982666, "loss": 0.6, "nll_loss": 0.5800604224205017, "rewards/accuracies": 1.0, "rewards/chosen": -0.18111544847488403, "rewards/margins": 0.2856542468070984, "rewards/rejected": -0.4667696952819824, "step": 5274 }, { "epoch": 14.442162902121835, "grad_norm": 3.618800640106201, "learning_rate": 2.773972602739726e-07, "log_odds_chosen": 3.9867866039276123, "log_odds_ratio": -0.05324453115463257, "logits/chosen": 1.095474362373352, "logits/rejected": 1.0525996685028076, "logps/chosen": -2.30824613571167, "logps/rejected": -6.074164867401123, "loss": 0.6121, "nll_loss": 0.6068225502967834, "rewards/accuracies": 1.0, "rewards/chosen": -0.23082461953163147, "rewards/margins": 0.37659186124801636, "rewards/rejected": -0.6074165105819702, "step": 5275 }, { "epoch": 14.444900752908966, "grad_norm": 7.971253871917725, "learning_rate": 2.772602739726027e-07, "log_odds_chosen": 1.8187354803085327, "log_odds_ratio": -0.3340907096862793, "logits/chosen": 1.108373999595642, "logits/rejected": 1.083875298500061, "logps/chosen": -2.071549415588379, "logps/rejected": -3.655280113220215, "loss": 0.5085, "nll_loss": 0.4750947654247284, "rewards/accuracies": 0.875, "rewards/chosen": -0.20715494453907013, "rewards/margins": 0.15837307274341583, "rewards/rejected": -0.36552801728248596, "step": 5276 }, { "epoch": 14.447638603696099, "grad_norm": 3.832216739654541, "learning_rate": 2.771232876712329e-07, "log_odds_chosen": 2.7986788749694824, "log_odds_ratio": -0.16157551109790802, "logits/chosen": 1.2886821031570435, "logits/rejected": 1.3448963165283203, "logps/chosen": -2.300311326980591, "logps/rejected": -4.976217269897461, "loss": 0.6095, "nll_loss": 0.593376100063324, "rewards/accuracies": 1.0, "rewards/chosen": -0.2300311177968979, "rewards/margins": 0.26759058237075806, "rewards/rejected": -0.49762171506881714, "step": 5277 }, { "epoch": 14.45037645448323, "grad_norm": 3.9785802364349365, "learning_rate": 2.76986301369863e-07, "log_odds_chosen": 4.645455360412598, "log_odds_ratio": -0.12476951628923416, "logits/chosen": 1.2247233390808105, "logits/rejected": 1.2003707885742188, "logps/chosen": -1.943235158920288, "logps/rejected": -6.390249729156494, "loss": 0.6824, "nll_loss": 0.6698897480964661, "rewards/accuracies": 1.0, "rewards/chosen": -0.19432352483272552, "rewards/margins": 0.4447014629840851, "rewards/rejected": -0.6390249729156494, "step": 5278 }, { "epoch": 14.453114305270363, "grad_norm": 3.6452696323394775, "learning_rate": 2.7684931506849315e-07, "log_odds_chosen": 2.4701149463653564, "log_odds_ratio": -0.2331073135137558, "logits/chosen": 1.4037470817565918, "logits/rejected": 1.4247361421585083, "logps/chosen": -1.3906217813491821, "logps/rejected": -3.487528085708618, "loss": 0.497, "nll_loss": 0.4736758768558502, "rewards/accuracies": 0.875, "rewards/chosen": -0.13906218111515045, "rewards/margins": 0.2096906304359436, "rewards/rejected": -0.34875282645225525, "step": 5279 }, { "epoch": 14.455852156057494, "grad_norm": 3.403015613555908, "learning_rate": 2.7671232876712325e-07, "log_odds_chosen": 2.202853202819824, "log_odds_ratio": -0.24722260236740112, "logits/chosen": 0.8410512804985046, "logits/rejected": 0.775875985622406, "logps/chosen": -1.9162189960479736, "logps/rejected": -3.958300828933716, "loss": 0.6132, "nll_loss": 0.5884969830513, "rewards/accuracies": 0.875, "rewards/chosen": -0.19162189960479736, "rewards/margins": 0.20420821011066437, "rewards/rejected": -0.39583009481430054, "step": 5280 }, { "epoch": 14.458590006844627, "grad_norm": 3.759403705596924, "learning_rate": 2.7657534246575345e-07, "log_odds_chosen": 4.2017316818237305, "log_odds_ratio": -0.12295261770486832, "logits/chosen": 1.0779169797897339, "logits/rejected": 0.9887686371803284, "logps/chosen": -1.604332447052002, "logps/rejected": -5.579207420349121, "loss": 0.5429, "nll_loss": 0.530624508857727, "rewards/accuracies": 1.0, "rewards/chosen": -0.16043326258659363, "rewards/margins": 0.39748749136924744, "rewards/rejected": -0.5579207539558411, "step": 5281 }, { "epoch": 14.461327857631758, "grad_norm": 3.7013659477233887, "learning_rate": 2.7643835616438355e-07, "log_odds_chosen": 3.959216833114624, "log_odds_ratio": -0.16583558917045593, "logits/chosen": 0.9998292922973633, "logits/rejected": 0.9593525528907776, "logps/chosen": -1.8393274545669556, "logps/rejected": -5.577321529388428, "loss": 0.4965, "nll_loss": 0.47991442680358887, "rewards/accuracies": 1.0, "rewards/chosen": -0.18393275141716003, "rewards/margins": 0.3737994432449341, "rewards/rejected": -0.5577322244644165, "step": 5282 }, { "epoch": 14.464065708418891, "grad_norm": 3.6889607906341553, "learning_rate": 2.7630136986301365e-07, "log_odds_chosen": 3.3049025535583496, "log_odds_ratio": -0.11997076869010925, "logits/chosen": 0.88887619972229, "logits/rejected": 0.8312469720840454, "logps/chosen": -1.5147473812103271, "logps/rejected": -4.548235893249512, "loss": 0.4213, "nll_loss": 0.40927863121032715, "rewards/accuracies": 1.0, "rewards/chosen": -0.1514747440814972, "rewards/margins": 0.3033488988876343, "rewards/rejected": -0.4548236131668091, "step": 5283 }, { "epoch": 14.466803559206022, "grad_norm": 3.733900785446167, "learning_rate": 2.7616438356164385e-07, "log_odds_chosen": 2.2852683067321777, "log_odds_ratio": -0.22042712569236755, "logits/chosen": 1.0867923498153687, "logits/rejected": 1.1149036884307861, "logps/chosen": -1.838616967201233, "logps/rejected": -3.9154155254364014, "loss": 0.5025, "nll_loss": 0.4804535508155823, "rewards/accuracies": 1.0, "rewards/chosen": -0.18386170268058777, "rewards/margins": 0.2076798528432846, "rewards/rejected": -0.39154157042503357, "step": 5284 }, { "epoch": 14.469541409993155, "grad_norm": 3.8112761974334717, "learning_rate": 2.7602739726027395e-07, "log_odds_chosen": 2.0565505027770996, "log_odds_ratio": -0.22140449285507202, "logits/chosen": 0.9485558271408081, "logits/rejected": 0.9368953704833984, "logps/chosen": -1.8158183097839355, "logps/rejected": -3.7096400260925293, "loss": 0.7056, "nll_loss": 0.6834924817085266, "rewards/accuracies": 1.0, "rewards/chosen": -0.18158183991909027, "rewards/margins": 0.18938219547271729, "rewards/rejected": -0.37096402049064636, "step": 5285 }, { "epoch": 14.472279260780287, "grad_norm": 4.055683612823486, "learning_rate": 2.758904109589041e-07, "log_odds_chosen": 4.119621753692627, "log_odds_ratio": -0.21771976351737976, "logits/chosen": 1.3412058353424072, "logits/rejected": 1.3452140092849731, "logps/chosen": -2.05115008354187, "logps/rejected": -6.057587623596191, "loss": 0.5112, "nll_loss": 0.4894073009490967, "rewards/accuracies": 0.875, "rewards/chosen": -0.20511502027511597, "rewards/margins": 0.4006437659263611, "rewards/rejected": -0.605758786201477, "step": 5286 }, { "epoch": 14.47501711156742, "grad_norm": 4.254369258880615, "learning_rate": 2.7575342465753426e-07, "log_odds_chosen": 1.9735145568847656, "log_odds_ratio": -0.2681986689567566, "logits/chosen": 1.4023635387420654, "logits/rejected": 1.4422566890716553, "logps/chosen": -2.32886004447937, "logps/rejected": -4.193924903869629, "loss": 0.5976, "nll_loss": 0.5707765817642212, "rewards/accuracies": 1.0, "rewards/chosen": -0.23288601636886597, "rewards/margins": 0.1865064799785614, "rewards/rejected": -0.41939252614974976, "step": 5287 }, { "epoch": 14.477754962354553, "grad_norm": 5.0922770500183105, "learning_rate": 2.7561643835616436e-07, "log_odds_chosen": 4.3218302726745605, "log_odds_ratio": -0.19413568079471588, "logits/chosen": 1.049074411392212, "logits/rejected": 1.043025016784668, "logps/chosen": -2.0154366493225098, "logps/rejected": -6.211006164550781, "loss": 0.6259, "nll_loss": 0.6065179109573364, "rewards/accuracies": 1.0, "rewards/chosen": -0.2015436589717865, "rewards/margins": 0.41955694556236267, "rewards/rejected": -0.6211006045341492, "step": 5288 }, { "epoch": 14.480492813141684, "grad_norm": 3.600372791290283, "learning_rate": 2.754794520547945e-07, "log_odds_chosen": 2.689999580383301, "log_odds_ratio": -0.1732633262872696, "logits/chosen": 1.4523745775222778, "logits/rejected": 1.4307808876037598, "logps/chosen": -1.4907941818237305, "logps/rejected": -3.918360710144043, "loss": 0.4029, "nll_loss": 0.38556718826293945, "rewards/accuracies": 1.0, "rewards/chosen": -0.14907941222190857, "rewards/margins": 0.2427566796541214, "rewards/rejected": -0.3918360769748688, "step": 5289 }, { "epoch": 14.483230663928817, "grad_norm": 5.388751029968262, "learning_rate": 2.753424657534246e-07, "log_odds_chosen": 2.005038261413574, "log_odds_ratio": -0.32120007276535034, "logits/chosen": 1.1507517099380493, "logits/rejected": 1.1784554719924927, "logps/chosen": -2.8281354904174805, "logps/rejected": -4.736660957336426, "loss": 0.6, "nll_loss": 0.5679110884666443, "rewards/accuracies": 0.875, "rewards/chosen": -0.28281354904174805, "rewards/margins": 0.190852552652359, "rewards/rejected": -0.47366610169410706, "step": 5290 }, { "epoch": 14.485968514715948, "grad_norm": 3.641838788986206, "learning_rate": 2.752054794520548e-07, "log_odds_chosen": 2.410613775253296, "log_odds_ratio": -0.18266619741916656, "logits/chosen": 1.1203680038452148, "logits/rejected": 1.088395118713379, "logps/chosen": -2.0930566787719727, "logps/rejected": -4.374013900756836, "loss": 0.5179, "nll_loss": 0.49964141845703125, "rewards/accuracies": 1.0, "rewards/chosen": -0.20930567383766174, "rewards/margins": 0.22809572517871857, "rewards/rejected": -0.4374014139175415, "step": 5291 }, { "epoch": 14.48870636550308, "grad_norm": 4.339801788330078, "learning_rate": 2.750684931506849e-07, "log_odds_chosen": 2.092489242553711, "log_odds_ratio": -0.3003470301628113, "logits/chosen": 1.1375564336776733, "logits/rejected": 1.22312331199646, "logps/chosen": -2.511944055557251, "logps/rejected": -4.485535144805908, "loss": 0.543, "nll_loss": 0.5129319429397583, "rewards/accuracies": 0.875, "rewards/chosen": -0.25119441747665405, "rewards/margins": 0.1973590850830078, "rewards/rejected": -0.44855350255966187, "step": 5292 }, { "epoch": 14.491444216290212, "grad_norm": 4.059738636016846, "learning_rate": 2.7493150684931506e-07, "log_odds_chosen": 2.750704288482666, "log_odds_ratio": -0.3650977313518524, "logits/chosen": 1.1424912214279175, "logits/rejected": 1.0562498569488525, "logps/chosen": -1.7509992122650146, "logps/rejected": -4.311062812805176, "loss": 0.5472, "nll_loss": 0.5107322335243225, "rewards/accuracies": 0.875, "rewards/chosen": -0.1750999391078949, "rewards/margins": 0.2560063600540161, "rewards/rejected": -0.4311062693595886, "step": 5293 }, { "epoch": 14.494182067077345, "grad_norm": 3.7200214862823486, "learning_rate": 2.747945205479452e-07, "log_odds_chosen": 4.181704521179199, "log_odds_ratio": -0.11768439412117004, "logits/chosen": 0.8072817921638489, "logits/rejected": 0.7231205701828003, "logps/chosen": -2.264744997024536, "logps/rejected": -6.318849563598633, "loss": 0.6915, "nll_loss": 0.6797354817390442, "rewards/accuracies": 1.0, "rewards/chosen": -0.22647449374198914, "rewards/margins": 0.405410498380661, "rewards/rejected": -0.6318849921226501, "step": 5294 }, { "epoch": 14.496919917864476, "grad_norm": 3.3842082023620605, "learning_rate": 2.746575342465753e-07, "log_odds_chosen": 3.3258109092712402, "log_odds_ratio": -0.11882093548774719, "logits/chosen": 0.879358172416687, "logits/rejected": 0.8698019981384277, "logps/chosen": -1.8764474391937256, "logps/rejected": -4.989083290100098, "loss": 0.5769, "nll_loss": 0.564974308013916, "rewards/accuracies": 1.0, "rewards/chosen": -0.18764474987983704, "rewards/margins": 0.3112635612487793, "rewards/rejected": -0.49890831112861633, "step": 5295 }, { "epoch": 14.499657768651609, "grad_norm": 4.273636341094971, "learning_rate": 2.7452054794520547e-07, "log_odds_chosen": 1.3786039352416992, "log_odds_ratio": -0.3220551013946533, "logits/chosen": 1.130483865737915, "logits/rejected": 1.0934243202209473, "logps/chosen": -1.6066136360168457, "logps/rejected": -2.8403730392456055, "loss": 0.4556, "nll_loss": 0.42343324422836304, "rewards/accuracies": 1.0, "rewards/chosen": -0.16066135466098785, "rewards/margins": 0.12337595224380493, "rewards/rejected": -0.2840372920036316, "step": 5296 }, { "epoch": 14.50239561943874, "grad_norm": 4.519249439239502, "learning_rate": 2.7438356164383557e-07, "log_odds_chosen": 2.618856430053711, "log_odds_ratio": -0.5015531182289124, "logits/chosen": 0.9852739572525024, "logits/rejected": 0.9844447374343872, "logps/chosen": -2.2509689331054688, "logps/rejected": -4.625285625457764, "loss": 0.6373, "nll_loss": 0.5871934294700623, "rewards/accuracies": 0.875, "rewards/chosen": -0.2250968962907791, "rewards/margins": 0.23743164539337158, "rewards/rejected": -0.4625285565853119, "step": 5297 }, { "epoch": 14.505133470225873, "grad_norm": 7.544943809509277, "learning_rate": 2.7424657534246577e-07, "log_odds_chosen": 2.8453571796417236, "log_odds_ratio": -0.29342684149742126, "logits/chosen": 0.837252140045166, "logits/rejected": 0.7581889033317566, "logps/chosen": -2.1411116123199463, "logps/rejected": -4.83029317855835, "loss": 0.6151, "nll_loss": 0.585751473903656, "rewards/accuracies": 0.875, "rewards/chosen": -0.21411114931106567, "rewards/margins": 0.2689181864261627, "rewards/rejected": -0.4830293357372284, "step": 5298 }, { "epoch": 14.507871321013004, "grad_norm": 3.4908969402313232, "learning_rate": 2.7410958904109587e-07, "log_odds_chosen": 1.9284225702285767, "log_odds_ratio": -0.24494600296020508, "logits/chosen": 1.0976622104644775, "logits/rejected": 1.0773158073425293, "logps/chosen": -2.466670036315918, "logps/rejected": -4.223666667938232, "loss": 0.5869, "nll_loss": 0.5624339580535889, "rewards/accuracies": 1.0, "rewards/chosen": -0.24666699767112732, "rewards/margins": 0.1756996214389801, "rewards/rejected": -0.4223666787147522, "step": 5299 }, { "epoch": 14.510609171800137, "grad_norm": 3.9004034996032715, "learning_rate": 2.73972602739726e-07, "log_odds_chosen": 1.738990068435669, "log_odds_ratio": -0.29557865858078003, "logits/chosen": 1.0016885995864868, "logits/rejected": 0.9431138038635254, "logps/chosen": -2.4132251739501953, "logps/rejected": -4.057426452636719, "loss": 0.6688, "nll_loss": 0.6392893195152283, "rewards/accuracies": 0.75, "rewards/chosen": -0.24132251739501953, "rewards/margins": 0.16442015767097473, "rewards/rejected": -0.4057426452636719, "step": 5300 }, { "epoch": 14.513347022587268, "grad_norm": 3.327948808670044, "learning_rate": 2.738356164383562e-07, "log_odds_chosen": 2.209017038345337, "log_odds_ratio": -0.23018638789653778, "logits/chosen": 1.0369434356689453, "logits/rejected": 1.0938013792037964, "logps/chosen": -1.8014758825302124, "logps/rejected": -3.8787612915039062, "loss": 0.4691, "nll_loss": 0.44604310393333435, "rewards/accuracies": 1.0, "rewards/chosen": -0.18014758825302124, "rewards/margins": 0.2077285200357437, "rewards/rejected": -0.38787609338760376, "step": 5301 }, { "epoch": 14.516084873374401, "grad_norm": 4.301425457000732, "learning_rate": 2.7369863013698627e-07, "log_odds_chosen": 1.8726083040237427, "log_odds_ratio": -0.2800540328025818, "logits/chosen": 0.8824280500411987, "logits/rejected": 0.8844760060310364, "logps/chosen": -1.689610481262207, "logps/rejected": -3.385669708251953, "loss": 0.5585, "nll_loss": 0.5304981470108032, "rewards/accuracies": 0.875, "rewards/chosen": -0.1689610481262207, "rewards/margins": 0.16960595548152924, "rewards/rejected": -0.33856701850891113, "step": 5302 }, { "epoch": 14.518822724161533, "grad_norm": 4.89041805267334, "learning_rate": 2.735616438356164e-07, "log_odds_chosen": 1.9281895160675049, "log_odds_ratio": -0.26140934228897095, "logits/chosen": 0.9858217239379883, "logits/rejected": 0.9754975438117981, "logps/chosen": -2.19443416595459, "logps/rejected": -3.958138942718506, "loss": 0.4743, "nll_loss": 0.4481937885284424, "rewards/accuracies": 0.875, "rewards/chosen": -0.2194434106349945, "rewards/margins": 0.17637048661708832, "rewards/rejected": -0.395813912153244, "step": 5303 }, { "epoch": 14.521560574948666, "grad_norm": 3.7242844104766846, "learning_rate": 2.734246575342465e-07, "log_odds_chosen": 2.014557361602783, "log_odds_ratio": -0.16848526895046234, "logits/chosen": 1.1241066455841064, "logits/rejected": 1.1135332584381104, "logps/chosen": -1.8279238939285278, "logps/rejected": -3.68515682220459, "loss": 0.5409, "nll_loss": 0.5240603685379028, "rewards/accuracies": 1.0, "rewards/chosen": -0.18279239535331726, "rewards/margins": 0.18572331964969635, "rewards/rejected": -0.3685157299041748, "step": 5304 }, { "epoch": 14.524298425735797, "grad_norm": 3.8521640300750732, "learning_rate": 2.7328767123287673e-07, "log_odds_chosen": 2.5237393379211426, "log_odds_ratio": -0.14500829577445984, "logits/chosen": 0.9425749778747559, "logits/rejected": 0.8917997479438782, "logps/chosen": -1.7214927673339844, "logps/rejected": -4.060137748718262, "loss": 0.4705, "nll_loss": 0.4559832215309143, "rewards/accuracies": 1.0, "rewards/chosen": -0.17214925587177277, "rewards/margins": 0.23386454582214355, "rewards/rejected": -0.4060137867927551, "step": 5305 }, { "epoch": 14.52703627652293, "grad_norm": 8.065403938293457, "learning_rate": 2.7315068493150683e-07, "log_odds_chosen": 2.197350263595581, "log_odds_ratio": -0.27680641412734985, "logits/chosen": 1.082592487335205, "logits/rejected": 1.0659312009811401, "logps/chosen": -2.254225254058838, "logps/rejected": -4.367764472961426, "loss": 0.6121, "nll_loss": 0.5843845009803772, "rewards/accuracies": 1.0, "rewards/chosen": -0.22542253136634827, "rewards/margins": 0.21135395765304565, "rewards/rejected": -0.43677645921707153, "step": 5306 }, { "epoch": 14.529774127310061, "grad_norm": 3.376467704772949, "learning_rate": 2.73013698630137e-07, "log_odds_chosen": 2.5958807468414307, "log_odds_ratio": -0.15172046422958374, "logits/chosen": 1.128435492515564, "logits/rejected": 1.1395586729049683, "logps/chosen": -2.208165407180786, "logps/rejected": -4.680902004241943, "loss": 0.6021, "nll_loss": 0.5869430303573608, "rewards/accuracies": 1.0, "rewards/chosen": -0.22081655263900757, "rewards/margins": 0.24727368354797363, "rewards/rejected": -0.4680902063846588, "step": 5307 }, { "epoch": 14.532511978097194, "grad_norm": 6.138041019439697, "learning_rate": 2.7287671232876713e-07, "log_odds_chosen": 2.311312437057495, "log_odds_ratio": -0.44501155614852905, "logits/chosen": 0.9612951278686523, "logits/rejected": 0.8900938034057617, "logps/chosen": -2.5323102474212646, "logps/rejected": -4.723440170288086, "loss": 0.6491, "nll_loss": 0.6046410799026489, "rewards/accuracies": 0.875, "rewards/chosen": -0.2532310485839844, "rewards/margins": 0.21911297738552094, "rewards/rejected": -0.4723440110683441, "step": 5308 }, { "epoch": 14.535249828884325, "grad_norm": 3.4174153804779053, "learning_rate": 2.7273972602739723e-07, "log_odds_chosen": 3.2292354106903076, "log_odds_ratio": -0.17761118710041046, "logits/chosen": 1.152836561203003, "logits/rejected": 1.1234893798828125, "logps/chosen": -1.8958189487457275, "logps/rejected": -5.000593185424805, "loss": 0.5341, "nll_loss": 0.5163082480430603, "rewards/accuracies": 0.875, "rewards/chosen": -0.18958190083503723, "rewards/margins": 0.31047743558883667, "rewards/rejected": -0.5000593066215515, "step": 5309 }, { "epoch": 14.537987679671458, "grad_norm": 5.695704936981201, "learning_rate": 2.726027397260274e-07, "log_odds_chosen": 3.302316665649414, "log_odds_ratio": -0.2818940281867981, "logits/chosen": 1.0839277505874634, "logits/rejected": 0.9666133522987366, "logps/chosen": -2.544055223464966, "logps/rejected": -5.746514797210693, "loss": 0.6268, "nll_loss": 0.5986587405204773, "rewards/accuracies": 0.875, "rewards/chosen": -0.25440555810928345, "rewards/margins": 0.3202459216117859, "rewards/rejected": -0.5746514797210693, "step": 5310 }, { "epoch": 14.54072553045859, "grad_norm": 4.696066856384277, "learning_rate": 2.724657534246575e-07, "log_odds_chosen": 2.860179901123047, "log_odds_ratio": -0.3719175159931183, "logits/chosen": 1.0395233631134033, "logits/rejected": 0.9996241927146912, "logps/chosen": -1.9169015884399414, "logps/rejected": -4.658914566040039, "loss": 0.4903, "nll_loss": 0.4531075060367584, "rewards/accuracies": 0.75, "rewards/chosen": -0.19169014692306519, "rewards/margins": 0.27420130372047424, "rewards/rejected": -0.46589145064353943, "step": 5311 }, { "epoch": 14.543463381245722, "grad_norm": 10.990830421447754, "learning_rate": 2.723287671232877e-07, "log_odds_chosen": 5.30982780456543, "log_odds_ratio": -0.25708284974098206, "logits/chosen": 0.9076915383338928, "logits/rejected": 0.8756642937660217, "logps/chosen": -2.03007173538208, "logps/rejected": -7.119586944580078, "loss": 0.7302, "nll_loss": 0.7044445276260376, "rewards/accuracies": 0.875, "rewards/chosen": -0.20300716161727905, "rewards/margins": 0.5089515447616577, "rewards/rejected": -0.711958646774292, "step": 5312 }, { "epoch": 14.546201232032853, "grad_norm": 3.5139315128326416, "learning_rate": 2.721917808219178e-07, "log_odds_chosen": 3.3591294288635254, "log_odds_ratio": -0.08960311114788055, "logits/chosen": 0.9666478037834167, "logits/rejected": 0.9582332372665405, "logps/chosen": -2.0859642028808594, "logps/rejected": -5.276935577392578, "loss": 0.5878, "nll_loss": 0.5787903070449829, "rewards/accuracies": 1.0, "rewards/chosen": -0.20859640836715698, "rewards/margins": 0.319097101688385, "rewards/rejected": -0.527693510055542, "step": 5313 }, { "epoch": 14.548939082819986, "grad_norm": 6.775388717651367, "learning_rate": 2.7205479452054794e-07, "log_odds_chosen": 2.2385239601135254, "log_odds_ratio": -0.4729244112968445, "logits/chosen": 1.1581133604049683, "logits/rejected": 1.1301814317703247, "logps/chosen": -2.603313446044922, "logps/rejected": -4.753689289093018, "loss": 0.6545, "nll_loss": 0.6072547435760498, "rewards/accuracies": 0.75, "rewards/chosen": -0.26033133268356323, "rewards/margins": 0.21503756940364838, "rewards/rejected": -0.4753689169883728, "step": 5314 }, { "epoch": 14.55167693360712, "grad_norm": 3.4513919353485107, "learning_rate": 2.719178082191781e-07, "log_odds_chosen": 2.742396354675293, "log_odds_ratio": -0.14088241755962372, "logits/chosen": 0.8384867310523987, "logits/rejected": 0.8303989171981812, "logps/chosen": -1.381068468093872, "logps/rejected": -3.825073719024658, "loss": 0.3914, "nll_loss": 0.3772639334201813, "rewards/accuracies": 1.0, "rewards/chosen": -0.1381068378686905, "rewards/margins": 0.24440054595470428, "rewards/rejected": -0.3825073838233948, "step": 5315 }, { "epoch": 14.55441478439425, "grad_norm": 3.604051351547241, "learning_rate": 2.717808219178082e-07, "log_odds_chosen": 3.089412212371826, "log_odds_ratio": -0.1538911610841751, "logits/chosen": 1.0422286987304688, "logits/rejected": 1.097962498664856, "logps/chosen": -2.2337646484375, "logps/rejected": -5.194177150726318, "loss": 0.75, "nll_loss": 0.7346251010894775, "rewards/accuracies": 0.875, "rewards/chosen": -0.22337648272514343, "rewards/margins": 0.29604119062423706, "rewards/rejected": -0.5194177031517029, "step": 5316 }, { "epoch": 14.557152635181383, "grad_norm": 9.378100395202637, "learning_rate": 2.7164383561643834e-07, "log_odds_chosen": 2.9153761863708496, "log_odds_ratio": -0.16901808977127075, "logits/chosen": 1.1322625875473022, "logits/rejected": 1.089741826057434, "logps/chosen": -2.1169655323028564, "logps/rejected": -4.789677619934082, "loss": 0.5619, "nll_loss": 0.5450155735015869, "rewards/accuracies": 1.0, "rewards/chosen": -0.2116965502500534, "rewards/margins": 0.26727116107940674, "rewards/rejected": -0.47896772623062134, "step": 5317 }, { "epoch": 14.559890485968515, "grad_norm": 3.1556193828582764, "learning_rate": 2.7150684931506844e-07, "log_odds_chosen": 4.178561687469482, "log_odds_ratio": -0.09424406290054321, "logits/chosen": 1.092571496963501, "logits/rejected": 1.1295253038406372, "logps/chosen": -1.8074440956115723, "logps/rejected": -5.7664642333984375, "loss": 0.5388, "nll_loss": 0.529367208480835, "rewards/accuracies": 1.0, "rewards/chosen": -0.18074443936347961, "rewards/margins": 0.39590203762054443, "rewards/rejected": -0.5766464471817017, "step": 5318 }, { "epoch": 14.562628336755647, "grad_norm": 3.5228521823883057, "learning_rate": 2.7136986301369865e-07, "log_odds_chosen": 2.591796636581421, "log_odds_ratio": -0.22299735248088837, "logits/chosen": 1.2622129917144775, "logits/rejected": 1.2515323162078857, "logps/chosen": -1.7624616622924805, "logps/rejected": -4.196745872497559, "loss": 0.5255, "nll_loss": 0.5031591057777405, "rewards/accuracies": 1.0, "rewards/chosen": -0.17624616622924805, "rewards/margins": 0.24342842400074005, "rewards/rejected": -0.4196745753288269, "step": 5319 }, { "epoch": 14.565366187542779, "grad_norm": 3.6681034564971924, "learning_rate": 2.7123287671232875e-07, "log_odds_chosen": 2.1051723957061768, "log_odds_ratio": -0.19987261295318604, "logits/chosen": 0.7724028825759888, "logits/rejected": 0.727785587310791, "logps/chosen": -1.5041379928588867, "logps/rejected": -3.4044530391693115, "loss": 0.5067, "nll_loss": 0.486664742231369, "rewards/accuracies": 1.0, "rewards/chosen": -0.15041381120681763, "rewards/margins": 0.1900315135717392, "rewards/rejected": -0.340445339679718, "step": 5320 }, { "epoch": 14.568104038329912, "grad_norm": 3.5674757957458496, "learning_rate": 2.710958904109589e-07, "log_odds_chosen": 3.4734559059143066, "log_odds_ratio": -0.19337673485279083, "logits/chosen": 0.9709717631340027, "logits/rejected": 0.9377899169921875, "logps/chosen": -2.0142364501953125, "logps/rejected": -5.3558478355407715, "loss": 0.5556, "nll_loss": 0.5363035202026367, "rewards/accuracies": 1.0, "rewards/chosen": -0.20142364501953125, "rewards/margins": 0.33416110277175903, "rewards/rejected": -0.5355846881866455, "step": 5321 }, { "epoch": 14.570841889117043, "grad_norm": 3.7646536827087402, "learning_rate": 2.7095890410958905e-07, "log_odds_chosen": 1.8273134231567383, "log_odds_ratio": -0.21964667737483978, "logits/chosen": 0.9924793243408203, "logits/rejected": 0.9751824736595154, "logps/chosen": -1.192836880683899, "logps/rejected": -2.7262496948242188, "loss": 0.5299, "nll_loss": 0.5079150795936584, "rewards/accuracies": 1.0, "rewards/chosen": -0.11928368359804153, "rewards/margins": 0.15334129333496094, "rewards/rejected": -0.2726249694824219, "step": 5322 }, { "epoch": 14.573579739904176, "grad_norm": 3.1484384536743164, "learning_rate": 2.7082191780821915e-07, "log_odds_chosen": 3.929478645324707, "log_odds_ratio": -0.06692530959844589, "logits/chosen": 1.0436699390411377, "logits/rejected": 1.081608533859253, "logps/chosen": -1.5443689823150635, "logps/rejected": -5.198373794555664, "loss": 0.636, "nll_loss": 0.6293573379516602, "rewards/accuracies": 1.0, "rewards/chosen": -0.1544368863105774, "rewards/margins": 0.3654004633426666, "rewards/rejected": -0.5198373794555664, "step": 5323 }, { "epoch": 14.576317590691307, "grad_norm": 3.3115921020507812, "learning_rate": 2.706849315068493e-07, "log_odds_chosen": 3.6209797859191895, "log_odds_ratio": -0.10435085743665695, "logits/chosen": 0.9569743275642395, "logits/rejected": 0.9810909628868103, "logps/chosen": -1.119489073753357, "logps/rejected": -4.244541168212891, "loss": 0.4559, "nll_loss": 0.44548746943473816, "rewards/accuracies": 1.0, "rewards/chosen": -0.1119489073753357, "rewards/margins": 0.31250521540641785, "rewards/rejected": -0.42445409297943115, "step": 5324 }, { "epoch": 14.57905544147844, "grad_norm": 9.500782012939453, "learning_rate": 2.7054794520547945e-07, "log_odds_chosen": 0.812049150466919, "log_odds_ratio": -0.6215989589691162, "logits/chosen": 1.1440733671188354, "logits/rejected": 1.069758653640747, "logps/chosen": -2.0186080932617188, "logps/rejected": -2.6671066284179688, "loss": 0.5387, "nll_loss": 0.4765894114971161, "rewards/accuracies": 0.875, "rewards/chosen": -0.20186081528663635, "rewards/margins": 0.0648498609662056, "rewards/rejected": -0.26671069860458374, "step": 5325 }, { "epoch": 14.581793292265571, "grad_norm": 4.112191677093506, "learning_rate": 2.704109589041096e-07, "log_odds_chosen": 2.6074936389923096, "log_odds_ratio": -0.12661801278591156, "logits/chosen": 1.3093347549438477, "logits/rejected": 1.3739266395568848, "logps/chosen": -2.0764784812927246, "logps/rejected": -4.531137466430664, "loss": 0.5473, "nll_loss": 0.5346335172653198, "rewards/accuracies": 1.0, "rewards/chosen": -0.20764783024787903, "rewards/margins": 0.24546591937541962, "rewards/rejected": -0.4531137943267822, "step": 5326 }, { "epoch": 14.584531143052704, "grad_norm": 5.209361553192139, "learning_rate": 2.702739726027397e-07, "log_odds_chosen": 1.6910820007324219, "log_odds_ratio": -0.30748915672302246, "logits/chosen": 1.0054465532302856, "logits/rejected": 1.0710537433624268, "logps/chosen": -1.8091907501220703, "logps/rejected": -3.2201404571533203, "loss": 0.5897, "nll_loss": 0.5589260458946228, "rewards/accuracies": 0.875, "rewards/chosen": -0.1809190958738327, "rewards/margins": 0.14109495282173157, "rewards/rejected": -0.32201406359672546, "step": 5327 }, { "epoch": 14.587268993839835, "grad_norm": 3.0160512924194336, "learning_rate": 2.701369863013698e-07, "log_odds_chosen": 4.714544296264648, "log_odds_ratio": -0.051455557346343994, "logits/chosen": 0.9525809288024902, "logits/rejected": 1.002021312713623, "logps/chosen": -1.7673982381820679, "logps/rejected": -6.232209205627441, "loss": 0.6191, "nll_loss": 0.6139920949935913, "rewards/accuracies": 1.0, "rewards/chosen": -0.17673984169960022, "rewards/margins": 0.44648104906082153, "rewards/rejected": -0.6232209205627441, "step": 5328 }, { "epoch": 14.590006844626968, "grad_norm": 3.5300180912017822, "learning_rate": 2.7e-07, "log_odds_chosen": 5.521420955657959, "log_odds_ratio": -0.1374823898077011, "logits/chosen": 1.1405948400497437, "logits/rejected": 1.106876015663147, "logps/chosen": -1.487827181816101, "logps/rejected": -6.714317321777344, "loss": 0.5095, "nll_loss": 0.49570930004119873, "rewards/accuracies": 1.0, "rewards/chosen": -0.14878271520137787, "rewards/margins": 0.5226490497589111, "rewards/rejected": -0.6714317798614502, "step": 5329 }, { "epoch": 14.5927446954141, "grad_norm": 9.557032585144043, "learning_rate": 2.698630136986301e-07, "log_odds_chosen": 3.3115134239196777, "log_odds_ratio": -0.26014482975006104, "logits/chosen": 1.1532481908798218, "logits/rejected": 1.086495041847229, "logps/chosen": -1.655282735824585, "logps/rejected": -4.7367963790893555, "loss": 0.496, "nll_loss": 0.4699450135231018, "rewards/accuracies": 0.875, "rewards/chosen": -0.1655282825231552, "rewards/margins": 0.3081513047218323, "rewards/rejected": -0.4736796021461487, "step": 5330 }, { "epoch": 14.595482546201232, "grad_norm": 3.2058138847351074, "learning_rate": 2.6972602739726026e-07, "log_odds_chosen": 2.2485594749450684, "log_odds_ratio": -0.18580292165279388, "logits/chosen": 1.124206781387329, "logits/rejected": 1.1363916397094727, "logps/chosen": -1.2315819263458252, "logps/rejected": -3.0839672088623047, "loss": 0.3905, "nll_loss": 0.37190043926239014, "rewards/accuracies": 1.0, "rewards/chosen": -0.12315819412469864, "rewards/margins": 0.1852385252714157, "rewards/rejected": -0.30839675664901733, "step": 5331 }, { "epoch": 14.598220396988363, "grad_norm": 4.02539587020874, "learning_rate": 2.695890410958904e-07, "log_odds_chosen": 2.74568247795105, "log_odds_ratio": -0.13758137822151184, "logits/chosen": 1.2338788509368896, "logits/rejected": 1.2622214555740356, "logps/chosen": -2.6791329383850098, "logps/rejected": -5.217548847198486, "loss": 0.6319, "nll_loss": 0.618177592754364, "rewards/accuracies": 1.0, "rewards/chosen": -0.2679133117198944, "rewards/margins": 0.2538415789604187, "rewards/rejected": -0.5217548608779907, "step": 5332 }, { "epoch": 14.600958247775496, "grad_norm": 3.6301803588867188, "learning_rate": 2.6945205479452056e-07, "log_odds_chosen": 1.6511656045913696, "log_odds_ratio": -0.24006515741348267, "logits/chosen": 1.3780913352966309, "logits/rejected": 1.1927441358566284, "logps/chosen": -1.3378286361694336, "logps/rejected": -2.7310900688171387, "loss": 0.4571, "nll_loss": 0.4331362247467041, "rewards/accuracies": 1.0, "rewards/chosen": -0.13378287851810455, "rewards/margins": 0.13932612538337708, "rewards/rejected": -0.27310898900032043, "step": 5333 }, { "epoch": 14.603696098562628, "grad_norm": 3.6135644912719727, "learning_rate": 2.6931506849315066e-07, "log_odds_chosen": 2.4450340270996094, "log_odds_ratio": -0.19151723384857178, "logits/chosen": 1.1908020973205566, "logits/rejected": 1.2422702312469482, "logps/chosen": -1.8238720893859863, "logps/rejected": -4.102408409118652, "loss": 0.4858, "nll_loss": 0.4666507840156555, "rewards/accuracies": 1.0, "rewards/chosen": -0.18238721787929535, "rewards/margins": 0.2278536558151245, "rewards/rejected": -0.4102408289909363, "step": 5334 }, { "epoch": 14.60643394934976, "grad_norm": 3.0869932174682617, "learning_rate": 2.6917808219178076e-07, "log_odds_chosen": 3.6859164237976074, "log_odds_ratio": -0.1403319537639618, "logits/chosen": 0.9976792335510254, "logits/rejected": 0.9754903316497803, "logps/chosen": -1.8578400611877441, "logps/rejected": -5.3601603507995605, "loss": 0.5292, "nll_loss": 0.5151393413543701, "rewards/accuracies": 1.0, "rewards/chosen": -0.1857840120792389, "rewards/margins": 0.35023200511932373, "rewards/rejected": -0.536016047000885, "step": 5335 }, { "epoch": 14.609171800136892, "grad_norm": 3.3718063831329346, "learning_rate": 2.6904109589041097e-07, "log_odds_chosen": 2.978898763656616, "log_odds_ratio": -0.13435997068881989, "logits/chosen": 1.0705095529556274, "logits/rejected": 1.0127166509628296, "logps/chosen": -1.3652819395065308, "logps/rejected": -4.034669876098633, "loss": 0.3946, "nll_loss": 0.3811975121498108, "rewards/accuracies": 1.0, "rewards/chosen": -0.13652819395065308, "rewards/margins": 0.26693880558013916, "rewards/rejected": -0.40346699953079224, "step": 5336 }, { "epoch": 14.611909650924025, "grad_norm": 3.790499687194824, "learning_rate": 2.6890410958904107e-07, "log_odds_chosen": 2.569988965988159, "log_odds_ratio": -0.24208292365074158, "logits/chosen": 1.0829544067382812, "logits/rejected": 1.0617189407348633, "logps/chosen": -1.7443373203277588, "logps/rejected": -4.141219139099121, "loss": 0.5281, "nll_loss": 0.5039393305778503, "rewards/accuracies": 1.0, "rewards/chosen": -0.17443373799324036, "rewards/margins": 0.2396882027387619, "rewards/rejected": -0.41412192583084106, "step": 5337 }, { "epoch": 14.614647501711158, "grad_norm": 5.260804653167725, "learning_rate": 2.687671232876712e-07, "log_odds_chosen": 3.190001964569092, "log_odds_ratio": -0.20649752020835876, "logits/chosen": 1.2210733890533447, "logits/rejected": 1.235742449760437, "logps/chosen": -2.516906499862671, "logps/rejected": -5.571298122406006, "loss": 0.6469, "nll_loss": 0.6262906789779663, "rewards/accuracies": 0.875, "rewards/chosen": -0.25169065594673157, "rewards/margins": 0.30543917417526245, "rewards/rejected": -0.5571298003196716, "step": 5338 }, { "epoch": 14.617385352498289, "grad_norm": 4.5828351974487305, "learning_rate": 2.6863013698630137e-07, "log_odds_chosen": 1.557551622390747, "log_odds_ratio": -0.29137304425239563, "logits/chosen": 1.1394801139831543, "logits/rejected": 1.1449404954910278, "logps/chosen": -1.6272387504577637, "logps/rejected": -2.9017133712768555, "loss": 0.5066, "nll_loss": 0.47741377353668213, "rewards/accuracies": 1.0, "rewards/chosen": -0.1627238690853119, "rewards/margins": 0.1274474859237671, "rewards/rejected": -0.290171355009079, "step": 5339 }, { "epoch": 14.62012320328542, "grad_norm": 6.371475696563721, "learning_rate": 2.684931506849315e-07, "log_odds_chosen": 2.644613027572632, "log_odds_ratio": -0.41519248485565186, "logits/chosen": 0.9083412885665894, "logits/rejected": 0.8827738761901855, "logps/chosen": -2.2451696395874023, "logps/rejected": -4.7229156494140625, "loss": 0.6127, "nll_loss": 0.5712301135063171, "rewards/accuracies": 0.875, "rewards/chosen": -0.22451695799827576, "rewards/margins": 0.2477746307849884, "rewards/rejected": -0.47229158878326416, "step": 5340 }, { "epoch": 14.622861054072553, "grad_norm": 4.387980937957764, "learning_rate": 2.683561643835616e-07, "log_odds_chosen": 4.589967250823975, "log_odds_ratio": -0.3292458951473236, "logits/chosen": 1.25235915184021, "logits/rejected": 1.2913066148757935, "logps/chosen": -2.6528453826904297, "logps/rejected": -7.220776081085205, "loss": 0.6456, "nll_loss": 0.612683892250061, "rewards/accuracies": 0.75, "rewards/chosen": -0.26528453826904297, "rewards/margins": 0.45679301023483276, "rewards/rejected": -0.7220776081085205, "step": 5341 }, { "epoch": 14.625598904859686, "grad_norm": 5.293481349945068, "learning_rate": 2.682191780821917e-07, "log_odds_chosen": 3.142491579055786, "log_odds_ratio": -0.13942672312259674, "logits/chosen": 1.1728839874267578, "logits/rejected": 1.0566797256469727, "logps/chosen": -1.737139105796814, "logps/rejected": -4.679698467254639, "loss": 0.5648, "nll_loss": 0.5508269667625427, "rewards/accuracies": 1.0, "rewards/chosen": -0.17371390759944916, "rewards/margins": 0.29425591230392456, "rewards/rejected": -0.4679698050022125, "step": 5342 }, { "epoch": 14.628336755646817, "grad_norm": 4.018857479095459, "learning_rate": 2.680821917808219e-07, "log_odds_chosen": 2.86836576461792, "log_odds_ratio": -0.15870296955108643, "logits/chosen": 1.2698649168014526, "logits/rejected": 1.2899881601333618, "logps/chosen": -2.0283358097076416, "logps/rejected": -4.7730302810668945, "loss": 0.6559, "nll_loss": 0.6399800777435303, "rewards/accuracies": 1.0, "rewards/chosen": -0.20283357799053192, "rewards/margins": 0.2744694650173187, "rewards/rejected": -0.47730302810668945, "step": 5343 }, { "epoch": 14.63107460643395, "grad_norm": 4.0693230628967285, "learning_rate": 2.67945205479452e-07, "log_odds_chosen": 2.097026824951172, "log_odds_ratio": -0.3897138833999634, "logits/chosen": 1.1348440647125244, "logits/rejected": 1.1732261180877686, "logps/chosen": -1.754852294921875, "logps/rejected": -3.4008073806762695, "loss": 0.4583, "nll_loss": 0.4193565249443054, "rewards/accuracies": 0.875, "rewards/chosen": -0.17548523843288422, "rewards/margins": 0.16459548473358154, "rewards/rejected": -0.34008070826530457, "step": 5344 }, { "epoch": 14.633812457221081, "grad_norm": 5.045656204223633, "learning_rate": 2.678082191780822e-07, "log_odds_chosen": 3.398604393005371, "log_odds_ratio": -0.11020351201295853, "logits/chosen": 1.085878849029541, "logits/rejected": 1.0997490882873535, "logps/chosen": -2.1843245029449463, "logps/rejected": -5.454843521118164, "loss": 0.6698, "nll_loss": 0.6587533354759216, "rewards/accuracies": 1.0, "rewards/chosen": -0.2184324413537979, "rewards/margins": 0.32705190777778625, "rewards/rejected": -0.5454843044281006, "step": 5345 }, { "epoch": 14.636550308008214, "grad_norm": 4.386533737182617, "learning_rate": 2.6767123287671233e-07, "log_odds_chosen": 6.0680131912231445, "log_odds_ratio": -0.055416129529476166, "logits/chosen": 1.1412407159805298, "logits/rejected": 1.1915491819381714, "logps/chosen": -1.9930055141448975, "logps/rejected": -7.865301132202148, "loss": 0.6613, "nll_loss": 0.6557227969169617, "rewards/accuracies": 1.0, "rewards/chosen": -0.19930055737495422, "rewards/margins": 0.5872296094894409, "rewards/rejected": -0.7865301370620728, "step": 5346 }, { "epoch": 14.639288158795345, "grad_norm": 6.833559513092041, "learning_rate": 2.675342465753425e-07, "log_odds_chosen": 1.6627386808395386, "log_odds_ratio": -0.3386061191558838, "logits/chosen": 1.3944419622421265, "logits/rejected": 1.4108967781066895, "logps/chosen": -1.908043622970581, "logps/rejected": -3.402646064758301, "loss": 0.5328, "nll_loss": 0.4989830255508423, "rewards/accuracies": 0.875, "rewards/chosen": -0.1908043622970581, "rewards/margins": 0.14946024119853973, "rewards/rejected": -0.34026461839675903, "step": 5347 }, { "epoch": 14.642026009582478, "grad_norm": 4.013547897338867, "learning_rate": 2.673972602739726e-07, "log_odds_chosen": 3.123081684112549, "log_odds_ratio": -0.18354934453964233, "logits/chosen": 1.1856985092163086, "logits/rejected": 1.1551682949066162, "logps/chosen": -1.9519257545471191, "logps/rejected": -4.898918151855469, "loss": 0.5468, "nll_loss": 0.5284602642059326, "rewards/accuracies": 1.0, "rewards/chosen": -0.1951925903558731, "rewards/margins": 0.29469919204711914, "rewards/rejected": -0.48989179730415344, "step": 5348 }, { "epoch": 14.64476386036961, "grad_norm": 4.551265239715576, "learning_rate": 2.672602739726027e-07, "log_odds_chosen": 1.2290716171264648, "log_odds_ratio": -0.3475983738899231, "logits/chosen": 1.0773414373397827, "logits/rejected": 1.0246686935424805, "logps/chosen": -2.1990675926208496, "logps/rejected": -3.2936739921569824, "loss": 0.5272, "nll_loss": 0.49247097969055176, "rewards/accuracies": 0.875, "rewards/chosen": -0.2199067771434784, "rewards/margins": 0.10946062207221985, "rewards/rejected": -0.32936739921569824, "step": 5349 }, { "epoch": 14.647501711156742, "grad_norm": 3.2122530937194824, "learning_rate": 2.671232876712329e-07, "log_odds_chosen": 3.9565391540527344, "log_odds_ratio": -0.07007334381341934, "logits/chosen": 1.0264531373977661, "logits/rejected": 0.9645029306411743, "logps/chosen": -1.780956745147705, "logps/rejected": -5.55076265335083, "loss": 0.4744, "nll_loss": 0.4673587679862976, "rewards/accuracies": 1.0, "rewards/chosen": -0.17809566855430603, "rewards/margins": 0.37698063254356384, "rewards/rejected": -0.5550763010978699, "step": 5350 }, { "epoch": 14.650239561943874, "grad_norm": 3.621070384979248, "learning_rate": 2.66986301369863e-07, "log_odds_chosen": 3.083219289779663, "log_odds_ratio": -0.14964011311531067, "logits/chosen": 0.9756563901901245, "logits/rejected": 0.9686468243598938, "logps/chosen": -2.080195426940918, "logps/rejected": -5.019535541534424, "loss": 0.586, "nll_loss": 0.5710527896881104, "rewards/accuracies": 1.0, "rewards/chosen": -0.20801955461502075, "rewards/margins": 0.2939339876174927, "rewards/rejected": -0.5019535422325134, "step": 5351 }, { "epoch": 14.652977412731007, "grad_norm": 3.5786962509155273, "learning_rate": 2.6684931506849314e-07, "log_odds_chosen": 1.515960931777954, "log_odds_ratio": -0.2893907129764557, "logits/chosen": 1.0649625062942505, "logits/rejected": 1.0843641757965088, "logps/chosen": -2.1429641246795654, "logps/rejected": -3.504514455795288, "loss": 0.537, "nll_loss": 0.5080571174621582, "rewards/accuracies": 1.0, "rewards/chosen": -0.21429643034934998, "rewards/margins": 0.13615500926971436, "rewards/rejected": -0.35045143961906433, "step": 5352 }, { "epoch": 14.655715263518138, "grad_norm": 4.361326217651367, "learning_rate": 2.667123287671233e-07, "log_odds_chosen": 1.2447218894958496, "log_odds_ratio": -0.4330770969390869, "logits/chosen": 0.9090073108673096, "logits/rejected": 0.8282875418663025, "logps/chosen": -2.0451459884643555, "logps/rejected": -3.1776843070983887, "loss": 0.6026, "nll_loss": 0.5593121647834778, "rewards/accuracies": 0.875, "rewards/chosen": -0.20451459288597107, "rewards/margins": 0.11325383186340332, "rewards/rejected": -0.3177684545516968, "step": 5353 }, { "epoch": 14.65845311430527, "grad_norm": 8.104764938354492, "learning_rate": 2.6657534246575344e-07, "log_odds_chosen": 2.8329100608825684, "log_odds_ratio": -0.41110759973526, "logits/chosen": 0.8905765414237976, "logits/rejected": 0.8712072372436523, "logps/chosen": -2.4402284622192383, "logps/rejected": -5.117758274078369, "loss": 0.7254, "nll_loss": 0.6842538118362427, "rewards/accuracies": 0.875, "rewards/chosen": -0.24402287602424622, "rewards/margins": 0.26775291562080383, "rewards/rejected": -0.5117758512496948, "step": 5354 }, { "epoch": 14.661190965092402, "grad_norm": 3.7886531352996826, "learning_rate": 2.6643835616438354e-07, "log_odds_chosen": 3.404269218444824, "log_odds_ratio": -0.2699764370918274, "logits/chosen": 1.1084928512573242, "logits/rejected": 1.0872825384140015, "logps/chosen": -1.1542710065841675, "logps/rejected": -4.1142354011535645, "loss": 0.4766, "nll_loss": 0.4495747685432434, "rewards/accuracies": 0.875, "rewards/chosen": -0.11542710661888123, "rewards/margins": 0.29599645733833313, "rewards/rejected": -0.41142356395721436, "step": 5355 }, { "epoch": 14.663928815879535, "grad_norm": 4.19784688949585, "learning_rate": 2.663013698630137e-07, "log_odds_chosen": 1.8076279163360596, "log_odds_ratio": -0.2393856644630432, "logits/chosen": 0.8031997680664062, "logits/rejected": 0.7020329236984253, "logps/chosen": -1.8849115371704102, "logps/rejected": -3.5149588584899902, "loss": 0.5146, "nll_loss": 0.4906953275203705, "rewards/accuracies": 1.0, "rewards/chosen": -0.18849113583564758, "rewards/margins": 0.16300474107265472, "rewards/rejected": -0.3514958620071411, "step": 5356 }, { "epoch": 14.666666666666666, "grad_norm": 3.7913825511932373, "learning_rate": 2.6616438356164384e-07, "log_odds_chosen": 3.7121987342834473, "log_odds_ratio": -0.17507073283195496, "logits/chosen": 1.214118480682373, "logits/rejected": 1.2299790382385254, "logps/chosen": -2.6376123428344727, "logps/rejected": -6.261396408081055, "loss": 0.6052, "nll_loss": 0.587725043296814, "rewards/accuracies": 1.0, "rewards/chosen": -0.2637612223625183, "rewards/margins": 0.36237841844558716, "rewards/rejected": -0.6261396408081055, "step": 5357 }, { "epoch": 14.669404517453799, "grad_norm": 3.974532127380371, "learning_rate": 2.6602739726027394e-07, "log_odds_chosen": 2.324733018875122, "log_odds_ratio": -0.27983611822128296, "logits/chosen": 1.014972448348999, "logits/rejected": 0.9283807277679443, "logps/chosen": -2.1742589473724365, "logps/rejected": -4.355434417724609, "loss": 0.4739, "nll_loss": 0.44592320919036865, "rewards/accuracies": 1.0, "rewards/chosen": -0.21742591261863708, "rewards/margins": 0.21811756491661072, "rewards/rejected": -0.4355434775352478, "step": 5358 }, { "epoch": 14.67214236824093, "grad_norm": 3.2691688537597656, "learning_rate": 2.658904109589041e-07, "log_odds_chosen": 3.614140748977661, "log_odds_ratio": -0.21637088060379028, "logits/chosen": 0.9045782089233398, "logits/rejected": 0.8796103000640869, "logps/chosen": -2.134199619293213, "logps/rejected": -5.637224197387695, "loss": 0.6148, "nll_loss": 0.5931448936462402, "rewards/accuracies": 0.875, "rewards/chosen": -0.21341995894908905, "rewards/margins": 0.35030245780944824, "rewards/rejected": -0.5637224912643433, "step": 5359 }, { "epoch": 14.674880219028063, "grad_norm": 3.6615569591522217, "learning_rate": 2.6575342465753425e-07, "log_odds_chosen": 3.1449124813079834, "log_odds_ratio": -0.19975973665714264, "logits/chosen": 0.9793164134025574, "logits/rejected": 0.9462014436721802, "logps/chosen": -1.7107501029968262, "logps/rejected": -4.67209529876709, "loss": 0.5452, "nll_loss": 0.5252077579498291, "rewards/accuracies": 1.0, "rewards/chosen": -0.1710750162601471, "rewards/margins": 0.29613447189331055, "rewards/rejected": -0.46720951795578003, "step": 5360 }, { "epoch": 14.677618069815194, "grad_norm": 6.804051399230957, "learning_rate": 2.656164383561644e-07, "log_odds_chosen": 3.8117666244506836, "log_odds_ratio": -0.42734646797180176, "logits/chosen": 0.9993507266044617, "logits/rejected": 0.9884271025657654, "logps/chosen": -2.178375720977783, "logps/rejected": -5.86088752746582, "loss": 0.5892, "nll_loss": 0.5464497804641724, "rewards/accuracies": 0.875, "rewards/chosen": -0.21783755719661713, "rewards/margins": 0.3682512044906616, "rewards/rejected": -0.5860887765884399, "step": 5361 }, { "epoch": 14.680355920602327, "grad_norm": 6.391469955444336, "learning_rate": 2.654794520547945e-07, "log_odds_chosen": 2.192887544631958, "log_odds_ratio": -0.22833997011184692, "logits/chosen": 0.9008034467697144, "logits/rejected": 0.8694332838058472, "logps/chosen": -2.1975886821746826, "logps/rejected": -4.19326114654541, "loss": 0.5116, "nll_loss": 0.48877274990081787, "rewards/accuracies": 1.0, "rewards/chosen": -0.21975886821746826, "rewards/margins": 0.1995673030614853, "rewards/rejected": -0.41932618618011475, "step": 5362 }, { "epoch": 14.683093771389458, "grad_norm": 3.4936604499816895, "learning_rate": 2.6534246575342465e-07, "log_odds_chosen": 1.9409868717193604, "log_odds_ratio": -0.21112918853759766, "logits/chosen": 1.233628511428833, "logits/rejected": 1.238694190979004, "logps/chosen": -1.6278269290924072, "logps/rejected": -3.3850388526916504, "loss": 0.4492, "nll_loss": 0.4281345307826996, "rewards/accuracies": 1.0, "rewards/chosen": -0.1627826988697052, "rewards/margins": 0.1757211834192276, "rewards/rejected": -0.3385038673877716, "step": 5363 }, { "epoch": 14.685831622176591, "grad_norm": 3.419318437576294, "learning_rate": 2.652054794520548e-07, "log_odds_chosen": 3.1674139499664307, "log_odds_ratio": -0.23252257704734802, "logits/chosen": 1.3395441770553589, "logits/rejected": 1.2584211826324463, "logps/chosen": -1.938565969467163, "logps/rejected": -4.959021091461182, "loss": 0.5117, "nll_loss": 0.48848065733909607, "rewards/accuracies": 1.0, "rewards/chosen": -0.1938565969467163, "rewards/margins": 0.3020455539226532, "rewards/rejected": -0.4959021508693695, "step": 5364 }, { "epoch": 14.688569472963724, "grad_norm": 3.7700355052948, "learning_rate": 2.650684931506849e-07, "log_odds_chosen": 3.3337361812591553, "log_odds_ratio": -0.47413337230682373, "logits/chosen": 0.8966022729873657, "logits/rejected": 0.9498257637023926, "logps/chosen": -1.6715270280838013, "logps/rejected": -4.657966136932373, "loss": 0.609, "nll_loss": 0.5616225004196167, "rewards/accuracies": 0.75, "rewards/chosen": -0.16715271770954132, "rewards/margins": 0.29864388704299927, "rewards/rejected": -0.4657966196537018, "step": 5365 }, { "epoch": 14.691307323750856, "grad_norm": 3.815786123275757, "learning_rate": 2.6493150684931505e-07, "log_odds_chosen": 3.124922037124634, "log_odds_ratio": -0.13182726502418518, "logits/chosen": 0.9043580293655396, "logits/rejected": 0.830263614654541, "logps/chosen": -1.7289369106292725, "logps/rejected": -4.650190353393555, "loss": 0.5042, "nll_loss": 0.491016149520874, "rewards/accuracies": 1.0, "rewards/chosen": -0.1728937029838562, "rewards/margins": 0.2921253740787506, "rewards/rejected": -0.4650190472602844, "step": 5366 }, { "epoch": 14.694045174537987, "grad_norm": 3.4162518978118896, "learning_rate": 2.647945205479452e-07, "log_odds_chosen": 2.8770406246185303, "log_odds_ratio": -0.23559701442718506, "logits/chosen": 1.2437090873718262, "logits/rejected": 1.2602806091308594, "logps/chosen": -1.5932708978652954, "logps/rejected": -4.249634265899658, "loss": 0.5109, "nll_loss": 0.4873409569263458, "rewards/accuracies": 1.0, "rewards/chosen": -0.15932708978652954, "rewards/margins": 0.2656363248825073, "rewards/rejected": -0.42496341466903687, "step": 5367 }, { "epoch": 14.69678302532512, "grad_norm": 3.1753039360046387, "learning_rate": 2.6465753424657536e-07, "log_odds_chosen": 3.4312400817871094, "log_odds_ratio": -0.14948861300945282, "logits/chosen": 1.0651171207427979, "logits/rejected": 1.0875142812728882, "logps/chosen": -1.7487621307373047, "logps/rejected": -4.997287273406982, "loss": 0.532, "nll_loss": 0.5170283913612366, "rewards/accuracies": 0.875, "rewards/chosen": -0.17487622797489166, "rewards/margins": 0.32485249638557434, "rewards/rejected": -0.4997287690639496, "step": 5368 }, { "epoch": 14.699520876112253, "grad_norm": 6.732349872589111, "learning_rate": 2.6452054794520546e-07, "log_odds_chosen": 1.5699163675308228, "log_odds_ratio": -0.32146692276000977, "logits/chosen": 1.1927976608276367, "logits/rejected": 1.252245306968689, "logps/chosen": -2.5227317810058594, "logps/rejected": -4.027474403381348, "loss": 0.617, "nll_loss": 0.5848535299301147, "rewards/accuracies": 0.875, "rewards/chosen": -0.25227317214012146, "rewards/margins": 0.15047428011894226, "rewards/rejected": -0.4027474522590637, "step": 5369 }, { "epoch": 14.702258726899384, "grad_norm": 3.281494379043579, "learning_rate": 2.643835616438356e-07, "log_odds_chosen": 4.337364673614502, "log_odds_ratio": -0.14773182570934296, "logits/chosen": 1.387979507446289, "logits/rejected": 1.4386930465698242, "logps/chosen": -1.9305800199508667, "logps/rejected": -6.074865818023682, "loss": 0.511, "nll_loss": 0.49627187848091125, "rewards/accuracies": 1.0, "rewards/chosen": -0.19305799901485443, "rewards/margins": 0.41442862153053284, "rewards/rejected": -0.6074866056442261, "step": 5370 }, { "epoch": 14.704996577686517, "grad_norm": 3.8049721717834473, "learning_rate": 2.6424657534246576e-07, "log_odds_chosen": 2.1624438762664795, "log_odds_ratio": -0.19412937760353088, "logits/chosen": 1.1466164588928223, "logits/rejected": 1.0993614196777344, "logps/chosen": -2.2152647972106934, "logps/rejected": -4.229605674743652, "loss": 0.5564, "nll_loss": 0.5370043516159058, "rewards/accuracies": 1.0, "rewards/chosen": -0.22152647376060486, "rewards/margins": 0.20143410563468933, "rewards/rejected": -0.4229605793952942, "step": 5371 }, { "epoch": 14.707734428473648, "grad_norm": 3.4876132011413574, "learning_rate": 2.6410958904109586e-07, "log_odds_chosen": 3.64804744720459, "log_odds_ratio": -0.12246756255626678, "logits/chosen": 0.9060957431793213, "logits/rejected": 0.8208534121513367, "logps/chosen": -1.5141894817352295, "logps/rejected": -4.840931415557861, "loss": 0.5173, "nll_loss": 0.5050592422485352, "rewards/accuracies": 1.0, "rewards/chosen": -0.15141895413398743, "rewards/margins": 0.33267420530319214, "rewards/rejected": -0.4840931296348572, "step": 5372 }, { "epoch": 14.710472279260781, "grad_norm": 3.867215394973755, "learning_rate": 2.63972602739726e-07, "log_odds_chosen": 2.2078053951263428, "log_odds_ratio": -0.21725930273532867, "logits/chosen": 1.292952299118042, "logits/rejected": 1.2818987369537354, "logps/chosen": -1.8058435916900635, "logps/rejected": -3.8306546211242676, "loss": 0.4835, "nll_loss": 0.4617832899093628, "rewards/accuracies": 1.0, "rewards/chosen": -0.1805843710899353, "rewards/margins": 0.20248109102249146, "rewards/rejected": -0.38306546211242676, "step": 5373 }, { "epoch": 14.713210130047912, "grad_norm": 3.9537863731384277, "learning_rate": 2.6383561643835616e-07, "log_odds_chosen": 3.2422499656677246, "log_odds_ratio": -0.2361091524362564, "logits/chosen": 0.7968977689743042, "logits/rejected": 0.8396026492118835, "logps/chosen": -2.2906131744384766, "logps/rejected": -5.381964206695557, "loss": 0.6515, "nll_loss": 0.6278695464134216, "rewards/accuracies": 0.875, "rewards/chosen": -0.2290613055229187, "rewards/margins": 0.3091351389884949, "rewards/rejected": -0.5381963849067688, "step": 5374 }, { "epoch": 14.715947980835045, "grad_norm": 3.5890493392944336, "learning_rate": 2.6369863013698626e-07, "log_odds_chosen": 3.6391279697418213, "log_odds_ratio": -0.15083514153957367, "logits/chosen": 1.2307084798812866, "logits/rejected": 1.2119669914245605, "logps/chosen": -1.8810808658599854, "logps/rejected": -5.365167617797852, "loss": 0.5424, "nll_loss": 0.5272712707519531, "rewards/accuracies": 1.0, "rewards/chosen": -0.18810808658599854, "rewards/margins": 0.34840863943099976, "rewards/rejected": -0.5365167260169983, "step": 5375 }, { "epoch": 14.718685831622176, "grad_norm": 3.998687505722046, "learning_rate": 2.635616438356164e-07, "log_odds_chosen": 2.7663118839263916, "log_odds_ratio": -0.2213706076145172, "logits/chosen": 0.9599074125289917, "logits/rejected": 0.892659068107605, "logps/chosen": -1.8336511850357056, "logps/rejected": -4.418680191040039, "loss": 0.5637, "nll_loss": 0.54151850938797, "rewards/accuracies": 1.0, "rewards/chosen": -0.1833651065826416, "rewards/margins": 0.25850290060043335, "rewards/rejected": -0.44186803698539734, "step": 5376 }, { "epoch": 14.72142368240931, "grad_norm": 3.753528594970703, "learning_rate": 2.6342465753424657e-07, "log_odds_chosen": 3.2731070518493652, "log_odds_ratio": -0.19674052298069, "logits/chosen": 0.7939510941505432, "logits/rejected": 0.7662781476974487, "logps/chosen": -1.7354559898376465, "logps/rejected": -4.758962631225586, "loss": 0.5426, "nll_loss": 0.522909939289093, "rewards/accuracies": 1.0, "rewards/chosen": -0.17354561388492584, "rewards/margins": 0.3023506999015808, "rewards/rejected": -0.47589629888534546, "step": 5377 }, { "epoch": 14.72416153319644, "grad_norm": 4.194918632507324, "learning_rate": 2.632876712328767e-07, "log_odds_chosen": 2.833852529525757, "log_odds_ratio": -0.25572657585144043, "logits/chosen": 0.8311116695404053, "logits/rejected": 0.8075070381164551, "logps/chosen": -1.5509793758392334, "logps/rejected": -4.142131328582764, "loss": 0.627, "nll_loss": 0.6013914346694946, "rewards/accuracies": 0.875, "rewards/chosen": -0.15509796142578125, "rewards/margins": 0.25911518931388855, "rewards/rejected": -0.4142131209373474, "step": 5378 }, { "epoch": 14.726899383983573, "grad_norm": 6.116226673126221, "learning_rate": 2.631506849315068e-07, "log_odds_chosen": 1.8795084953308105, "log_odds_ratio": -0.3349788784980774, "logits/chosen": 1.0490717887878418, "logits/rejected": 1.0111536979675293, "logps/chosen": -2.1561009883880615, "logps/rejected": -3.807675838470459, "loss": 0.4836, "nll_loss": 0.4500795602798462, "rewards/accuracies": 0.875, "rewards/chosen": -0.2156100869178772, "rewards/margins": 0.1651575118303299, "rewards/rejected": -0.3807675838470459, "step": 5379 }, { "epoch": 14.729637234770705, "grad_norm": 5.624783039093018, "learning_rate": 2.6301369863013697e-07, "log_odds_chosen": 1.4579399824142456, "log_odds_ratio": -0.4776409864425659, "logits/chosen": 1.080235242843628, "logits/rejected": 1.051506519317627, "logps/chosen": -2.263298988342285, "logps/rejected": -3.5757036209106445, "loss": 0.6828, "nll_loss": 0.6350747346878052, "rewards/accuracies": 0.75, "rewards/chosen": -0.22632990777492523, "rewards/margins": 0.13124047219753265, "rewards/rejected": -0.3575703799724579, "step": 5380 }, { "epoch": 14.732375085557837, "grad_norm": 4.195607662200928, "learning_rate": 2.628767123287671e-07, "log_odds_chosen": 3.5780422687530518, "log_odds_ratio": -0.08453281968832016, "logits/chosen": 1.3240025043487549, "logits/rejected": 1.4295275211334229, "logps/chosen": -2.5373265743255615, "logps/rejected": -5.976840972900391, "loss": 0.5924, "nll_loss": 0.583971381187439, "rewards/accuracies": 1.0, "rewards/chosen": -0.25373268127441406, "rewards/margins": 0.3439514636993408, "rewards/rejected": -0.5976841449737549, "step": 5381 }, { "epoch": 14.735112936344969, "grad_norm": 3.7367451190948486, "learning_rate": 2.627397260273972e-07, "log_odds_chosen": 3.092525005340576, "log_odds_ratio": -0.18666517734527588, "logits/chosen": 1.1622846126556396, "logits/rejected": 1.191291332244873, "logps/chosen": -1.857312560081482, "logps/rejected": -4.797291278839111, "loss": 0.5354, "nll_loss": 0.5167656540870667, "rewards/accuracies": 1.0, "rewards/chosen": -0.18573123216629028, "rewards/margins": 0.2939979135990143, "rewards/rejected": -0.47972917556762695, "step": 5382 }, { "epoch": 14.737850787132102, "grad_norm": 3.445676803588867, "learning_rate": 2.626027397260274e-07, "log_odds_chosen": 3.0020012855529785, "log_odds_ratio": -0.18957287073135376, "logits/chosen": 1.0546504259109497, "logits/rejected": 0.9965581893920898, "logps/chosen": -1.7545157670974731, "logps/rejected": -4.5782365798950195, "loss": 0.5738, "nll_loss": 0.5548093318939209, "rewards/accuracies": 1.0, "rewards/chosen": -0.17545157670974731, "rewards/margins": 0.2823720872402191, "rewards/rejected": -0.45782366394996643, "step": 5383 }, { "epoch": 14.740588637919233, "grad_norm": 4.78527307510376, "learning_rate": 2.624657534246575e-07, "log_odds_chosen": 4.115445137023926, "log_odds_ratio": -0.2571268081665039, "logits/chosen": 1.070145606994629, "logits/rejected": 1.093843936920166, "logps/chosen": -2.472691297531128, "logps/rejected": -6.480066299438477, "loss": 0.6685, "nll_loss": 0.6428303718566895, "rewards/accuracies": 0.875, "rewards/chosen": -0.24726912379264832, "rewards/margins": 0.4007375240325928, "rewards/rejected": -0.6480066776275635, "step": 5384 }, { "epoch": 14.743326488706366, "grad_norm": 3.5950889587402344, "learning_rate": 2.623287671232877e-07, "log_odds_chosen": 2.705608606338501, "log_odds_ratio": -0.21890349686145782, "logits/chosen": 1.0769153833389282, "logits/rejected": 1.0467722415924072, "logps/chosen": -1.5969160795211792, "logps/rejected": -4.0724334716796875, "loss": 0.4847, "nll_loss": 0.4628358483314514, "rewards/accuracies": 1.0, "rewards/chosen": -0.15969161689281464, "rewards/margins": 0.24755175411701202, "rewards/rejected": -0.40724337100982666, "step": 5385 }, { "epoch": 14.746064339493497, "grad_norm": 4.5719218254089355, "learning_rate": 2.621917808219178e-07, "log_odds_chosen": 3.38248872756958, "log_odds_ratio": -0.16870556771755219, "logits/chosen": 0.6694366931915283, "logits/rejected": 0.5302658677101135, "logps/chosen": -1.2430055141448975, "logps/rejected": -4.323691368103027, "loss": 0.5439, "nll_loss": 0.5270364284515381, "rewards/accuracies": 1.0, "rewards/chosen": -0.12430056929588318, "rewards/margins": 0.30806857347488403, "rewards/rejected": -0.4323691427707672, "step": 5386 }, { "epoch": 14.74880219028063, "grad_norm": 3.2809503078460693, "learning_rate": 2.6205479452054793e-07, "log_odds_chosen": 2.8304216861724854, "log_odds_ratio": -0.16707761585712433, "logits/chosen": 0.883087158203125, "logits/rejected": 0.821438193321228, "logps/chosen": -1.769606351852417, "logps/rejected": -4.408231735229492, "loss": 0.5602, "nll_loss": 0.5434902906417847, "rewards/accuracies": 0.875, "rewards/chosen": -0.17696063220500946, "rewards/margins": 0.26386260986328125, "rewards/rejected": -0.4408232271671295, "step": 5387 }, { "epoch": 14.751540041067761, "grad_norm": 3.864488124847412, "learning_rate": 2.619178082191781e-07, "log_odds_chosen": 2.4535465240478516, "log_odds_ratio": -0.24845460057258606, "logits/chosen": 0.9750286936759949, "logits/rejected": 0.9420892596244812, "logps/chosen": -1.3700661659240723, "logps/rejected": -3.614130973815918, "loss": 0.4467, "nll_loss": 0.421897292137146, "rewards/accuracies": 1.0, "rewards/chosen": -0.13700661063194275, "rewards/margins": 0.22440646588802338, "rewards/rejected": -0.3614131212234497, "step": 5388 }, { "epoch": 14.754277891854894, "grad_norm": 3.670523166656494, "learning_rate": 2.617808219178082e-07, "log_odds_chosen": 3.826666831970215, "log_odds_ratio": -0.2683132290840149, "logits/chosen": 1.0840034484863281, "logits/rejected": 1.136203408241272, "logps/chosen": -2.4832816123962402, "logps/rejected": -6.2245707511901855, "loss": 0.5725, "nll_loss": 0.5456387996673584, "rewards/accuracies": 0.75, "rewards/chosen": -0.24832819402217865, "rewards/margins": 0.37412890791893005, "rewards/rejected": -0.6224570870399475, "step": 5389 }, { "epoch": 14.757015742642025, "grad_norm": 4.614304542541504, "learning_rate": 2.6164383561643833e-07, "log_odds_chosen": 4.057094573974609, "log_odds_ratio": -0.14600126445293427, "logits/chosen": 0.9097311496734619, "logits/rejected": 0.891035795211792, "logps/chosen": -1.8185746669769287, "logps/rejected": -5.6061906814575195, "loss": 0.5783, "nll_loss": 0.5637195706367493, "rewards/accuracies": 1.0, "rewards/chosen": -0.18185746669769287, "rewards/margins": 0.37876155972480774, "rewards/rejected": -0.5606189966201782, "step": 5390 }, { "epoch": 14.759753593429158, "grad_norm": 3.376136541366577, "learning_rate": 2.615068493150685e-07, "log_odds_chosen": 4.096271514892578, "log_odds_ratio": -0.09771168231964111, "logits/chosen": 1.1627898216247559, "logits/rejected": 1.2176716327667236, "logps/chosen": -1.6796222925186157, "logps/rejected": -5.557297706604004, "loss": 0.6031, "nll_loss": 0.5933135747909546, "rewards/accuracies": 1.0, "rewards/chosen": -0.1679622381925583, "rewards/margins": 0.3877675235271454, "rewards/rejected": -0.5557297468185425, "step": 5391 }, { "epoch": 14.762491444216291, "grad_norm": 3.8517444133758545, "learning_rate": 2.6136986301369864e-07, "log_odds_chosen": 2.2851219177246094, "log_odds_ratio": -0.23616188764572144, "logits/chosen": 1.2121829986572266, "logits/rejected": 1.1849048137664795, "logps/chosen": -1.5608423948287964, "logps/rejected": -3.5740818977355957, "loss": 0.5171, "nll_loss": 0.49347347021102905, "rewards/accuracies": 1.0, "rewards/chosen": -0.15608423948287964, "rewards/margins": 0.20132394134998322, "rewards/rejected": -0.35740819573402405, "step": 5392 }, { "epoch": 14.765229295003422, "grad_norm": 3.938049077987671, "learning_rate": 2.6123287671232874e-07, "log_odds_chosen": 1.8549492359161377, "log_odds_ratio": -0.27806568145751953, "logits/chosen": 1.3611981868743896, "logits/rejected": 1.371812105178833, "logps/chosen": -1.8312536478042603, "logps/rejected": -3.3974390029907227, "loss": 0.5026, "nll_loss": 0.4747779965400696, "rewards/accuracies": 1.0, "rewards/chosen": -0.18312537670135498, "rewards/margins": 0.15661850571632385, "rewards/rejected": -0.33974388241767883, "step": 5393 }, { "epoch": 14.767967145790553, "grad_norm": 5.404651165008545, "learning_rate": 2.6109589041095894e-07, "log_odds_chosen": 4.348184108734131, "log_odds_ratio": -0.1960209608078003, "logits/chosen": 1.0518274307250977, "logits/rejected": 1.066260576248169, "logps/chosen": -2.2637224197387695, "logps/rejected": -6.410399436950684, "loss": 0.6697, "nll_loss": 0.6501476764678955, "rewards/accuracies": 1.0, "rewards/chosen": -0.22637224197387695, "rewards/margins": 0.41466766595840454, "rewards/rejected": -0.6410399079322815, "step": 5394 }, { "epoch": 14.770704996577686, "grad_norm": 3.660050868988037, "learning_rate": 2.6095890410958904e-07, "log_odds_chosen": 3.229621648788452, "log_odds_ratio": -0.21405825018882751, "logits/chosen": 1.0771726369857788, "logits/rejected": 1.0717718601226807, "logps/chosen": -1.7406628131866455, "logps/rejected": -4.797065258026123, "loss": 0.5152, "nll_loss": 0.4938325881958008, "rewards/accuracies": 1.0, "rewards/chosen": -0.17406627535820007, "rewards/margins": 0.30564025044441223, "rewards/rejected": -0.4797064960002899, "step": 5395 }, { "epoch": 14.77344284736482, "grad_norm": 3.626615047454834, "learning_rate": 2.6082191780821914e-07, "log_odds_chosen": 2.801712989807129, "log_odds_ratio": -0.2237912267446518, "logits/chosen": 0.7003433704376221, "logits/rejected": 0.668189287185669, "logps/chosen": -2.211634635925293, "logps/rejected": -4.827304840087891, "loss": 0.5377, "nll_loss": 0.5152865052223206, "rewards/accuracies": 0.875, "rewards/chosen": -0.22116348147392273, "rewards/margins": 0.26156699657440186, "rewards/rejected": -0.4827304780483246, "step": 5396 }, { "epoch": 14.77618069815195, "grad_norm": 4.20550012588501, "learning_rate": 2.606849315068493e-07, "log_odds_chosen": 1.823476791381836, "log_odds_ratio": -0.3091004490852356, "logits/chosen": 1.219796895980835, "logits/rejected": 1.1994848251342773, "logps/chosen": -1.4882984161376953, "logps/rejected": -3.1603569984436035, "loss": 0.5448, "nll_loss": 0.5138749480247498, "rewards/accuracies": 1.0, "rewards/chosen": -0.148829847574234, "rewards/margins": 0.1672058403491974, "rewards/rejected": -0.3160356879234314, "step": 5397 }, { "epoch": 14.778918548939084, "grad_norm": 5.381524085998535, "learning_rate": 2.6054794520547944e-07, "log_odds_chosen": 3.2210004329681396, "log_odds_ratio": -0.22697973251342773, "logits/chosen": 1.0637681484222412, "logits/rejected": 0.969674289226532, "logps/chosen": -1.810835838317871, "logps/rejected": -4.857872009277344, "loss": 0.5738, "nll_loss": 0.5510989427566528, "rewards/accuracies": 1.0, "rewards/chosen": -0.1810835897922516, "rewards/margins": 0.30470362305641174, "rewards/rejected": -0.48578718304634094, "step": 5398 }, { "epoch": 14.781656399726215, "grad_norm": 3.9237356185913086, "learning_rate": 2.604109589041096e-07, "log_odds_chosen": 3.3475069999694824, "log_odds_ratio": -0.12976635992527008, "logits/chosen": 1.0101853609085083, "logits/rejected": 0.992125928401947, "logps/chosen": -1.4129080772399902, "logps/rejected": -4.50022029876709, "loss": 0.5588, "nll_loss": 0.5458266735076904, "rewards/accuracies": 1.0, "rewards/chosen": -0.1412908136844635, "rewards/margins": 0.30873122811317444, "rewards/rejected": -0.45002204179763794, "step": 5399 }, { "epoch": 14.784394250513348, "grad_norm": 6.184774875640869, "learning_rate": 2.602739726027397e-07, "log_odds_chosen": 1.3747448921203613, "log_odds_ratio": -0.46411705017089844, "logits/chosen": 1.1139845848083496, "logits/rejected": 1.1475670337677002, "logps/chosen": -2.801237106323242, "logps/rejected": -4.0572829246521, "loss": 0.62, "nll_loss": 0.5736289620399475, "rewards/accuracies": 0.75, "rewards/chosen": -0.2801237106323242, "rewards/margins": 0.12560462951660156, "rewards/rejected": -0.4057283401489258, "step": 5400 }, { "epoch": 14.787132101300479, "grad_norm": 5.499433517456055, "learning_rate": 2.601369863013699e-07, "log_odds_chosen": 2.2393901348114014, "log_odds_ratio": -0.5709906816482544, "logits/chosen": 1.1169719696044922, "logits/rejected": 1.2579283714294434, "logps/chosen": -2.274301528930664, "logps/rejected": -4.423408031463623, "loss": 0.5683, "nll_loss": 0.5111683011054993, "rewards/accuracies": 0.875, "rewards/chosen": -0.22743014991283417, "rewards/margins": 0.214910626411438, "rewards/rejected": -0.44234079122543335, "step": 5401 }, { "epoch": 14.789869952087612, "grad_norm": 3.9678657054901123, "learning_rate": 2.6e-07, "log_odds_chosen": 1.8188259601593018, "log_odds_ratio": -0.3463567793369293, "logits/chosen": 1.1722385883331299, "logits/rejected": 1.1324832439422607, "logps/chosen": -2.552814245223999, "logps/rejected": -4.276803970336914, "loss": 0.5019, "nll_loss": 0.4672260582447052, "rewards/accuracies": 0.875, "rewards/chosen": -0.2552814185619354, "rewards/margins": 0.17239893972873688, "rewards/rejected": -0.4276803731918335, "step": 5402 }, { "epoch": 14.792607802874743, "grad_norm": 7.03275203704834, "learning_rate": 2.598630136986301e-07, "log_odds_chosen": 1.479472279548645, "log_odds_ratio": -0.573870837688446, "logits/chosen": 0.9603729248046875, "logits/rejected": 0.9475969076156616, "logps/chosen": -2.6449146270751953, "logps/rejected": -4.014369964599609, "loss": 0.5895, "nll_loss": 0.5321076512336731, "rewards/accuracies": 0.875, "rewards/chosen": -0.264491468667984, "rewards/margins": 0.13694551587104797, "rewards/rejected": -0.401436984539032, "step": 5403 }, { "epoch": 14.795345653661876, "grad_norm": 4.282849311828613, "learning_rate": 2.5972602739726025e-07, "log_odds_chosen": 2.582676649093628, "log_odds_ratio": -0.24229885637760162, "logits/chosen": 1.0268452167510986, "logits/rejected": 0.9868624806404114, "logps/chosen": -1.8254034519195557, "logps/rejected": -4.256000995635986, "loss": 0.6151, "nll_loss": 0.5908287167549133, "rewards/accuracies": 1.0, "rewards/chosen": -0.18254034221172333, "rewards/margins": 0.24305975437164307, "rewards/rejected": -0.4256001114845276, "step": 5404 }, { "epoch": 14.798083504449007, "grad_norm": 4.820727825164795, "learning_rate": 2.595890410958904e-07, "log_odds_chosen": 2.2893247604370117, "log_odds_ratio": -0.23038001358509064, "logits/chosen": 1.0036200284957886, "logits/rejected": 0.9585708975791931, "logps/chosen": -1.6084022521972656, "logps/rejected": -3.4723401069641113, "loss": 0.522, "nll_loss": 0.4990079998970032, "rewards/accuracies": 0.875, "rewards/chosen": -0.1608402281999588, "rewards/margins": 0.18639379739761353, "rewards/rejected": -0.34723401069641113, "step": 5405 }, { "epoch": 14.80082135523614, "grad_norm": 3.317181348800659, "learning_rate": 2.5945205479452055e-07, "log_odds_chosen": 1.8041154146194458, "log_odds_ratio": -0.2295989990234375, "logits/chosen": 1.226765751838684, "logits/rejected": 1.126577615737915, "logps/chosen": -1.294455647468567, "logps/rejected": -2.8566880226135254, "loss": 0.3998, "nll_loss": 0.3768812417984009, "rewards/accuracies": 1.0, "rewards/chosen": -0.12944556772708893, "rewards/margins": 0.15622322261333466, "rewards/rejected": -0.2856687903404236, "step": 5406 }, { "epoch": 14.803559206023271, "grad_norm": 3.65450119972229, "learning_rate": 2.5931506849315065e-07, "log_odds_chosen": 1.5180675983428955, "log_odds_ratio": -0.24121946096420288, "logits/chosen": 1.0859460830688477, "logits/rejected": 1.0428705215454102, "logps/chosen": -1.7566373348236084, "logps/rejected": -3.110729932785034, "loss": 0.4522, "nll_loss": 0.4280884861946106, "rewards/accuracies": 1.0, "rewards/chosen": -0.17566373944282532, "rewards/margins": 0.13540926575660706, "rewards/rejected": -0.3110730051994324, "step": 5407 }, { "epoch": 14.806297056810404, "grad_norm": 4.900009632110596, "learning_rate": 2.5917808219178086e-07, "log_odds_chosen": 1.6221076250076294, "log_odds_ratio": -0.35469290614128113, "logits/chosen": 1.086401343345642, "logits/rejected": 1.0161861181259155, "logps/chosen": -1.3083839416503906, "logps/rejected": -2.7411348819732666, "loss": 0.4772, "nll_loss": 0.4417637586593628, "rewards/accuracies": 0.75, "rewards/chosen": -0.13083839416503906, "rewards/margins": 0.14327508211135864, "rewards/rejected": -0.2741134762763977, "step": 5408 }, { "epoch": 14.809034907597535, "grad_norm": 5.762182712554932, "learning_rate": 2.5904109589041096e-07, "log_odds_chosen": 3.5581541061401367, "log_odds_ratio": -0.15031105279922485, "logits/chosen": 1.2373124361038208, "logits/rejected": 1.1451969146728516, "logps/chosen": -1.8529990911483765, "logps/rejected": -5.2107462882995605, "loss": 0.5794, "nll_loss": 0.5643770098686218, "rewards/accuracies": 1.0, "rewards/chosen": -0.18529990315437317, "rewards/margins": 0.3357747197151184, "rewards/rejected": -0.521074652671814, "step": 5409 }, { "epoch": 14.811772758384668, "grad_norm": 13.521434783935547, "learning_rate": 2.5890410958904106e-07, "log_odds_chosen": 1.7610961198806763, "log_odds_ratio": -0.4605691432952881, "logits/chosen": 1.5415138006210327, "logits/rejected": 1.5103847980499268, "logps/chosen": -2.177999258041382, "logps/rejected": -3.5647151470184326, "loss": 0.5949, "nll_loss": 0.5488008260726929, "rewards/accuracies": 0.75, "rewards/chosen": -0.21779993176460266, "rewards/margins": 0.13867157697677612, "rewards/rejected": -0.3564715087413788, "step": 5410 }, { "epoch": 14.8145106091718, "grad_norm": 3.7184221744537354, "learning_rate": 2.587671232876712e-07, "log_odds_chosen": 2.6812312602996826, "log_odds_ratio": -0.17487388849258423, "logits/chosen": 1.1232085227966309, "logits/rejected": 1.2160961627960205, "logps/chosen": -1.8968605995178223, "logps/rejected": -4.372478008270264, "loss": 0.5247, "nll_loss": 0.5072055459022522, "rewards/accuracies": 1.0, "rewards/chosen": -0.18968607485294342, "rewards/margins": 0.24756170809268951, "rewards/rejected": -0.43724775314331055, "step": 5411 }, { "epoch": 14.817248459958932, "grad_norm": 3.7591328620910645, "learning_rate": 2.5863013698630136e-07, "log_odds_chosen": 3.2766952514648438, "log_odds_ratio": -0.11564076691865921, "logits/chosen": 0.7129406929016113, "logits/rejected": 0.6391336917877197, "logps/chosen": -1.293023943901062, "logps/rejected": -4.243288040161133, "loss": 0.6514, "nll_loss": 0.6398802995681763, "rewards/accuracies": 1.0, "rewards/chosen": -0.12930241227149963, "rewards/margins": 0.29502636194229126, "rewards/rejected": -0.4243287742137909, "step": 5412 }, { "epoch": 14.819986310746064, "grad_norm": 4.226625919342041, "learning_rate": 2.584931506849315e-07, "log_odds_chosen": 2.043900966644287, "log_odds_ratio": -0.15330463647842407, "logits/chosen": 1.3040775060653687, "logits/rejected": 1.317878246307373, "logps/chosen": -2.0723166465759277, "logps/rejected": -3.9589946269989014, "loss": 0.5157, "nll_loss": 0.50038081407547, "rewards/accuracies": 1.0, "rewards/chosen": -0.20723168551921844, "rewards/margins": 0.18866777420043945, "rewards/rejected": -0.3958994746208191, "step": 5413 }, { "epoch": 14.822724161533197, "grad_norm": 5.617927074432373, "learning_rate": 2.583561643835616e-07, "log_odds_chosen": 1.7361363172531128, "log_odds_ratio": -0.2634698152542114, "logits/chosen": 1.065358281135559, "logits/rejected": 1.0778058767318726, "logps/chosen": -2.141209125518799, "logps/rejected": -3.6925971508026123, "loss": 0.4816, "nll_loss": 0.45521461963653564, "rewards/accuracies": 0.875, "rewards/chosen": -0.21412090957164764, "rewards/margins": 0.1551388055086136, "rewards/rejected": -0.36925971508026123, "step": 5414 }, { "epoch": 14.825462012320328, "grad_norm": 3.587829113006592, "learning_rate": 2.582191780821918e-07, "log_odds_chosen": 3.60060715675354, "log_odds_ratio": -0.24018029868602753, "logits/chosen": 1.4538352489471436, "logits/rejected": 1.3970386981964111, "logps/chosen": -1.940565586090088, "logps/rejected": -5.403103828430176, "loss": 0.4606, "nll_loss": 0.4365447163581848, "rewards/accuracies": 1.0, "rewards/chosen": -0.19405657052993774, "rewards/margins": 0.34625381231307983, "rewards/rejected": -0.5403103828430176, "step": 5415 }, { "epoch": 14.82819986310746, "grad_norm": 3.579850673675537, "learning_rate": 2.580821917808219e-07, "log_odds_chosen": 2.6505980491638184, "log_odds_ratio": -0.27065393328666687, "logits/chosen": 0.9057928919792175, "logits/rejected": 0.82564377784729, "logps/chosen": -1.5526444911956787, "logps/rejected": -4.0349836349487305, "loss": 0.465, "nll_loss": 0.43796306848526, "rewards/accuracies": 0.875, "rewards/chosen": -0.1552644670009613, "rewards/margins": 0.24823391437530518, "rewards/rejected": -0.4034983813762665, "step": 5416 }, { "epoch": 14.830937713894592, "grad_norm": 4.859841346740723, "learning_rate": 2.57945205479452e-07, "log_odds_chosen": 3.0870437622070312, "log_odds_ratio": -0.26838627457618713, "logits/chosen": 1.110019326210022, "logits/rejected": 1.0612589120864868, "logps/chosen": -2.0974388122558594, "logps/rejected": -4.967877388000488, "loss": 0.6467, "nll_loss": 0.6198447346687317, "rewards/accuracies": 0.75, "rewards/chosen": -0.20974388718605042, "rewards/margins": 0.28704386949539185, "rewards/rejected": -0.49678778648376465, "step": 5417 }, { "epoch": 14.833675564681725, "grad_norm": 3.7551560401916504, "learning_rate": 2.5780821917808217e-07, "log_odds_chosen": 4.135693073272705, "log_odds_ratio": -0.19516265392303467, "logits/chosen": 1.1894183158874512, "logits/rejected": 1.2458882331848145, "logps/chosen": -2.0024094581604004, "logps/rejected": -5.934874057769775, "loss": 0.5354, "nll_loss": 0.5158730149269104, "rewards/accuracies": 0.875, "rewards/chosen": -0.20024093985557556, "rewards/margins": 0.39324647188186646, "rewards/rejected": -0.5934873819351196, "step": 5418 }, { "epoch": 14.836413415468858, "grad_norm": 3.6922223567962646, "learning_rate": 2.576712328767123e-07, "log_odds_chosen": 5.294623851776123, "log_odds_ratio": -0.11671346426010132, "logits/chosen": 1.0479224920272827, "logits/rejected": 1.0420162677764893, "logps/chosen": -2.3334810733795166, "logps/rejected": -7.486800670623779, "loss": 0.6064, "nll_loss": 0.5947408676147461, "rewards/accuracies": 1.0, "rewards/chosen": -0.23334811627864838, "rewards/margins": 0.515332043170929, "rewards/rejected": -0.7486801147460938, "step": 5419 }, { "epoch": 14.839151266255989, "grad_norm": 7.0229058265686035, "learning_rate": 2.5753424657534247e-07, "log_odds_chosen": 2.2607533931732178, "log_odds_ratio": -0.3204273581504822, "logits/chosen": 1.091673731803894, "logits/rejected": 1.0583078861236572, "logps/chosen": -1.8152613639831543, "logps/rejected": -3.907170295715332, "loss": 0.5451, "nll_loss": 0.5130730271339417, "rewards/accuracies": 0.75, "rewards/chosen": -0.18152613937854767, "rewards/margins": 0.20919087529182434, "rewards/rejected": -0.3907170295715332, "step": 5420 }, { "epoch": 14.841889117043122, "grad_norm": 4.046229839324951, "learning_rate": 2.5739726027397257e-07, "log_odds_chosen": 2.924131155014038, "log_odds_ratio": -0.35681992769241333, "logits/chosen": 1.1002286672592163, "logits/rejected": 1.0565412044525146, "logps/chosen": -1.715269684791565, "logps/rejected": -4.479304790496826, "loss": 0.51, "nll_loss": 0.4742872416973114, "rewards/accuracies": 0.75, "rewards/chosen": -0.1715269535779953, "rewards/margins": 0.2764035761356354, "rewards/rejected": -0.4479305148124695, "step": 5421 }, { "epoch": 14.844626967830253, "grad_norm": 4.6123456954956055, "learning_rate": 2.572602739726027e-07, "log_odds_chosen": 2.1218373775482178, "log_odds_ratio": -0.1932174563407898, "logits/chosen": 0.9544826745986938, "logits/rejected": 0.8993003964424133, "logps/chosen": -1.8105734586715698, "logps/rejected": -3.743701457977295, "loss": 0.5277, "nll_loss": 0.508338451385498, "rewards/accuracies": 1.0, "rewards/chosen": -0.18105734884738922, "rewards/margins": 0.19331279397010803, "rewards/rejected": -0.37437015771865845, "step": 5422 }, { "epoch": 14.847364818617386, "grad_norm": 4.860469341278076, "learning_rate": 2.571232876712329e-07, "log_odds_chosen": 2.421107769012451, "log_odds_ratio": -0.30893418192863464, "logits/chosen": 1.213786005973816, "logits/rejected": 1.191631555557251, "logps/chosen": -2.1614718437194824, "logps/rejected": -4.43595027923584, "loss": 0.5642, "nll_loss": 0.5333384275436401, "rewards/accuracies": 0.875, "rewards/chosen": -0.21614718437194824, "rewards/margins": 0.22744785249233246, "rewards/rejected": -0.4435950517654419, "step": 5423 }, { "epoch": 14.850102669404517, "grad_norm": 4.048600673675537, "learning_rate": 2.56986301369863e-07, "log_odds_chosen": 1.7247542142868042, "log_odds_ratio": -0.2176045924425125, "logits/chosen": 0.9313275814056396, "logits/rejected": 0.9068983197212219, "logps/chosen": -1.824366807937622, "logps/rejected": -3.345612049102783, "loss": 0.553, "nll_loss": 0.5312204360961914, "rewards/accuracies": 1.0, "rewards/chosen": -0.18243670463562012, "rewards/margins": 0.1521245241165161, "rewards/rejected": -0.33456122875213623, "step": 5424 }, { "epoch": 14.85284052019165, "grad_norm": 7.281833171844482, "learning_rate": 2.568493150684932e-07, "log_odds_chosen": 3.8320789337158203, "log_odds_ratio": -0.15999239683151245, "logits/chosen": 1.1430468559265137, "logits/rejected": 1.1793451309204102, "logps/chosen": -2.3929615020751953, "logps/rejected": -6.059770584106445, "loss": 0.6416, "nll_loss": 0.6256113052368164, "rewards/accuracies": 1.0, "rewards/chosen": -0.23929616808891296, "rewards/margins": 0.3666808605194092, "rewards/rejected": -0.6059770584106445, "step": 5425 }, { "epoch": 14.855578370978781, "grad_norm": 4.9783430099487305, "learning_rate": 2.567123287671233e-07, "log_odds_chosen": 0.7893508672714233, "log_odds_ratio": -0.6266329288482666, "logits/chosen": 1.1410309076309204, "logits/rejected": 1.1523244380950928, "logps/chosen": -2.273099422454834, "logps/rejected": -3.005983591079712, "loss": 0.5221, "nll_loss": 0.45943477749824524, "rewards/accuracies": 0.625, "rewards/chosen": -0.2273099273443222, "rewards/margins": 0.07328841835260391, "rewards/rejected": -0.3005983829498291, "step": 5426 }, { "epoch": 14.858316221765914, "grad_norm": 3.395045518875122, "learning_rate": 2.5657534246575343e-07, "log_odds_chosen": 2.8523616790771484, "log_odds_ratio": -0.16351786255836487, "logits/chosen": 0.9928357601165771, "logits/rejected": 1.0066163539886475, "logps/chosen": -1.7148982286453247, "logps/rejected": -4.349000930786133, "loss": 0.6346, "nll_loss": 0.6182878017425537, "rewards/accuracies": 1.0, "rewards/chosen": -0.17148983478546143, "rewards/margins": 0.2634102702140808, "rewards/rejected": -0.43490010499954224, "step": 5427 }, { "epoch": 14.861054072553046, "grad_norm": 3.617218017578125, "learning_rate": 2.5643835616438353e-07, "log_odds_chosen": 3.183987617492676, "log_odds_ratio": -0.16651010513305664, "logits/chosen": 1.0453323125839233, "logits/rejected": 1.0268357992172241, "logps/chosen": -2.4685678482055664, "logps/rejected": -5.483935356140137, "loss": 0.57, "nll_loss": 0.5533967614173889, "rewards/accuracies": 1.0, "rewards/chosen": -0.24685677886009216, "rewards/margins": 0.3015367388725281, "rewards/rejected": -0.5483935475349426, "step": 5428 }, { "epoch": 14.863791923340179, "grad_norm": 3.4910387992858887, "learning_rate": 2.563013698630137e-07, "log_odds_chosen": 2.174309492111206, "log_odds_ratio": -0.2705463767051697, "logits/chosen": 1.177198886871338, "logits/rejected": 1.1719958782196045, "logps/chosen": -1.5686500072479248, "logps/rejected": -3.601799249649048, "loss": 0.4733, "nll_loss": 0.4462810754776001, "rewards/accuracies": 1.0, "rewards/chosen": -0.15686501562595367, "rewards/margins": 0.20331493020057678, "rewards/rejected": -0.36017993092536926, "step": 5429 }, { "epoch": 14.86652977412731, "grad_norm": 3.7877893447875977, "learning_rate": 2.5616438356164383e-07, "log_odds_chosen": 3.981064558029175, "log_odds_ratio": -0.17956259846687317, "logits/chosen": 1.0851311683654785, "logits/rejected": 1.0339300632476807, "logps/chosen": -1.8235788345336914, "logps/rejected": -5.591502666473389, "loss": 0.556, "nll_loss": 0.5380120277404785, "rewards/accuracies": 1.0, "rewards/chosen": -0.18235787749290466, "rewards/margins": 0.37679237127304077, "rewards/rejected": -0.5591502785682678, "step": 5430 }, { "epoch": 14.869267624914443, "grad_norm": 3.3595950603485107, "learning_rate": 2.5602739726027393e-07, "log_odds_chosen": 2.290741205215454, "log_odds_ratio": -0.16970205307006836, "logits/chosen": 1.0776252746582031, "logits/rejected": 1.0709741115570068, "logps/chosen": -1.8403912782669067, "logps/rejected": -3.970303535461426, "loss": 0.468, "nll_loss": 0.45099958777427673, "rewards/accuracies": 1.0, "rewards/chosen": -0.1840391308069229, "rewards/margins": 0.21299120783805847, "rewards/rejected": -0.3970303535461426, "step": 5431 }, { "epoch": 14.872005475701574, "grad_norm": 6.452320575714111, "learning_rate": 2.5589041095890414e-07, "log_odds_chosen": 1.8096264600753784, "log_odds_ratio": -0.39659446477890015, "logits/chosen": 1.2171756029129028, "logits/rejected": 1.1580843925476074, "logps/chosen": -2.241941452026367, "logps/rejected": -3.912433624267578, "loss": 0.5555, "nll_loss": 0.515884280204773, "rewards/accuracies": 0.75, "rewards/chosen": -0.22419416904449463, "rewards/margins": 0.16704922914505005, "rewards/rejected": -0.3912433683872223, "step": 5432 }, { "epoch": 14.874743326488707, "grad_norm": 3.6092605590820312, "learning_rate": 2.5575342465753424e-07, "log_odds_chosen": 2.848642349243164, "log_odds_ratio": -0.13158351182937622, "logits/chosen": 1.1565749645233154, "logits/rejected": 1.1724258661270142, "logps/chosen": -1.7140119075775146, "logps/rejected": -4.350465774536133, "loss": 0.5208, "nll_loss": 0.5076184868812561, "rewards/accuracies": 1.0, "rewards/chosen": -0.17140117287635803, "rewards/margins": 0.26364538073539734, "rewards/rejected": -0.43504655361175537, "step": 5433 }, { "epoch": 14.877481177275838, "grad_norm": 4.282359600067139, "learning_rate": 2.556164383561644e-07, "log_odds_chosen": 2.8705897331237793, "log_odds_ratio": -0.19167187809944153, "logits/chosen": 1.102962613105774, "logits/rejected": 1.1762572526931763, "logps/chosen": -2.243795394897461, "logps/rejected": -4.983421802520752, "loss": 0.6704, "nll_loss": 0.6511999368667603, "rewards/accuracies": 0.875, "rewards/chosen": -0.2243795394897461, "rewards/margins": 0.2739626467227936, "rewards/rejected": -0.4983421862125397, "step": 5434 }, { "epoch": 14.880219028062971, "grad_norm": 4.2084856033325195, "learning_rate": 2.554794520547945e-07, "log_odds_chosen": 3.1892623901367188, "log_odds_ratio": -0.2362474799156189, "logits/chosen": 1.2673910856246948, "logits/rejected": 1.3195754289627075, "logps/chosen": -2.435896396636963, "logps/rejected": -5.512195110321045, "loss": 0.6075, "nll_loss": 0.5838273167610168, "rewards/accuracies": 0.875, "rewards/chosen": -0.2435896396636963, "rewards/margins": 0.30762994289398193, "rewards/rejected": -0.5512195229530334, "step": 5435 }, { "epoch": 14.882956878850102, "grad_norm": 3.97273325920105, "learning_rate": 2.5534246575342464e-07, "log_odds_chosen": 1.8475111722946167, "log_odds_ratio": -0.2829611301422119, "logits/chosen": 1.1307765245437622, "logits/rejected": 1.040000557899475, "logps/chosen": -1.244742512702942, "logps/rejected": -2.874542236328125, "loss": 0.4931, "nll_loss": 0.46482810378074646, "rewards/accuracies": 1.0, "rewards/chosen": -0.12447424978017807, "rewards/margins": 0.16297996044158936, "rewards/rejected": -0.287454217672348, "step": 5436 }, { "epoch": 14.885694729637235, "grad_norm": 10.854296684265137, "learning_rate": 2.552054794520548e-07, "log_odds_chosen": 2.9400041103363037, "log_odds_ratio": -0.4365687370300293, "logits/chosen": 1.1258704662322998, "logits/rejected": 1.1338205337524414, "logps/chosen": -2.599881172180176, "logps/rejected": -5.366647720336914, "loss": 0.5365, "nll_loss": 0.49284738302230835, "rewards/accuracies": 0.875, "rewards/chosen": -0.25998809933662415, "rewards/margins": 0.27667665481567383, "rewards/rejected": -0.5366647839546204, "step": 5437 }, { "epoch": 14.888432580424366, "grad_norm": 3.5290145874023438, "learning_rate": 2.550684931506849e-07, "log_odds_chosen": 3.434749126434326, "log_odds_ratio": -0.206953763961792, "logits/chosen": 1.1512283086776733, "logits/rejected": 1.1107257604599, "logps/chosen": -1.8056867122650146, "logps/rejected": -5.090081214904785, "loss": 0.5362, "nll_loss": 0.5154817700386047, "rewards/accuracies": 1.0, "rewards/chosen": -0.180568665266037, "rewards/margins": 0.3284394145011902, "rewards/rejected": -0.5090081095695496, "step": 5438 }, { "epoch": 14.8911704312115, "grad_norm": 3.797696590423584, "learning_rate": 2.549315068493151e-07, "log_odds_chosen": 1.9827053546905518, "log_odds_ratio": -0.22164154052734375, "logits/chosen": 0.8510321974754333, "logits/rejected": 0.7594021558761597, "logps/chosen": -1.4813683032989502, "logps/rejected": -3.257228374481201, "loss": 0.464, "nll_loss": 0.4418580234050751, "rewards/accuracies": 1.0, "rewards/chosen": -0.14813685417175293, "rewards/margins": 0.17758597433567047, "rewards/rejected": -0.3257228434085846, "step": 5439 }, { "epoch": 14.89390828199863, "grad_norm": 6.398045063018799, "learning_rate": 2.547945205479452e-07, "log_odds_chosen": 2.24290132522583, "log_odds_ratio": -0.41161802411079407, "logits/chosen": 1.2082910537719727, "logits/rejected": 1.21329665184021, "logps/chosen": -2.805419683456421, "logps/rejected": -5.00543212890625, "loss": 0.6103, "nll_loss": 0.5691531300544739, "rewards/accuracies": 0.75, "rewards/chosen": -0.28054195642471313, "rewards/margins": 0.2200012356042862, "rewards/rejected": -0.5005431771278381, "step": 5440 }, { "epoch": 14.896646132785763, "grad_norm": 3.6441540718078613, "learning_rate": 2.5465753424657535e-07, "log_odds_chosen": 3.4054179191589355, "log_odds_ratio": -0.19614583253860474, "logits/chosen": 0.8515703678131104, "logits/rejected": 0.8658226132392883, "logps/chosen": -1.8452378511428833, "logps/rejected": -5.069020748138428, "loss": 0.6107, "nll_loss": 0.5911047458648682, "rewards/accuracies": 0.875, "rewards/chosen": -0.1845237910747528, "rewards/margins": 0.3223782777786255, "rewards/rejected": -0.5069020986557007, "step": 5441 }, { "epoch": 14.899383983572895, "grad_norm": 3.9476511478424072, "learning_rate": 2.5452054794520545e-07, "log_odds_chosen": 1.56573486328125, "log_odds_ratio": -0.43786072731018066, "logits/chosen": 1.1855978965759277, "logits/rejected": 1.195630431175232, "logps/chosen": -1.980799913406372, "logps/rejected": -3.426913022994995, "loss": 0.6943, "nll_loss": 0.6505259871482849, "rewards/accuracies": 0.75, "rewards/chosen": -0.19808000326156616, "rewards/margins": 0.14461131393909454, "rewards/rejected": -0.3426912724971771, "step": 5442 }, { "epoch": 14.902121834360027, "grad_norm": 4.169459819793701, "learning_rate": 2.543835616438356e-07, "log_odds_chosen": 2.9085991382598877, "log_odds_ratio": -0.25734472274780273, "logits/chosen": 1.1190600395202637, "logits/rejected": 1.1329838037490845, "logps/chosen": -1.7259292602539062, "logps/rejected": -4.500861644744873, "loss": 0.4959, "nll_loss": 0.4701332449913025, "rewards/accuracies": 0.875, "rewards/chosen": -0.17259293794631958, "rewards/margins": 0.2774932384490967, "rewards/rejected": -0.45008620619773865, "step": 5443 }, { "epoch": 14.904859685147159, "grad_norm": 5.611315727233887, "learning_rate": 2.5424657534246575e-07, "log_odds_chosen": 3.810134172439575, "log_odds_ratio": -0.1006806492805481, "logits/chosen": 1.1997774839401245, "logits/rejected": 1.1787652969360352, "logps/chosen": -2.344057321548462, "logps/rejected": -6.001861572265625, "loss": 0.6974, "nll_loss": 0.6873308420181274, "rewards/accuracies": 1.0, "rewards/chosen": -0.2344057261943817, "rewards/margins": 0.36578041315078735, "rewards/rejected": -0.6001861691474915, "step": 5444 }, { "epoch": 14.907597535934292, "grad_norm": 6.885862827301025, "learning_rate": 2.5410958904109585e-07, "log_odds_chosen": 2.719937324523926, "log_odds_ratio": -0.4141303300857544, "logits/chosen": 0.99775630235672, "logits/rejected": 0.989119827747345, "logps/chosen": -2.4273648262023926, "logps/rejected": -5.079292297363281, "loss": 0.732, "nll_loss": 0.690598726272583, "rewards/accuracies": 0.75, "rewards/chosen": -0.24273647367954254, "rewards/margins": 0.26519280672073364, "rewards/rejected": -0.507929265499115, "step": 5445 }, { "epoch": 14.910335386721425, "grad_norm": 4.457182884216309, "learning_rate": 2.5397260273972605e-07, "log_odds_chosen": 3.269270420074463, "log_odds_ratio": -0.2448655515909195, "logits/chosen": 1.002856731414795, "logits/rejected": 0.9581298828125, "logps/chosen": -1.9900188446044922, "logps/rejected": -5.066340923309326, "loss": 0.6633, "nll_loss": 0.6387700438499451, "rewards/accuracies": 0.875, "rewards/chosen": -0.19900187849998474, "rewards/margins": 0.3076322376728058, "rewards/rejected": -0.5066341161727905, "step": 5446 }, { "epoch": 14.913073237508556, "grad_norm": 4.012293338775635, "learning_rate": 2.5383561643835615e-07, "log_odds_chosen": 6.2280378341674805, "log_odds_ratio": -0.07886940240859985, "logits/chosen": 1.0130372047424316, "logits/rejected": 1.0070760250091553, "logps/chosen": -1.1052719354629517, "logps/rejected": -6.8992390632629395, "loss": 0.5714, "nll_loss": 0.5635070204734802, "rewards/accuracies": 1.0, "rewards/chosen": -0.11052719503641129, "rewards/margins": 0.5793967247009277, "rewards/rejected": -0.689923882484436, "step": 5447 }, { "epoch": 14.915811088295689, "grad_norm": 8.798577308654785, "learning_rate": 2.536986301369863e-07, "log_odds_chosen": 1.717726469039917, "log_odds_ratio": -0.7513152956962585, "logits/chosen": 1.2885081768035889, "logits/rejected": 1.2530659437179565, "logps/chosen": -2.2417595386505127, "logps/rejected": -3.7342426776885986, "loss": 0.6038, "nll_loss": 0.5286825895309448, "rewards/accuracies": 0.75, "rewards/chosen": -0.22417597472667694, "rewards/margins": 0.14924833178520203, "rewards/rejected": -0.3734242916107178, "step": 5448 }, { "epoch": 14.91854893908282, "grad_norm": 3.507244825363159, "learning_rate": 2.535616438356164e-07, "log_odds_chosen": 3.1206958293914795, "log_odds_ratio": -0.1675933599472046, "logits/chosen": 1.1621603965759277, "logits/rejected": 1.1880183219909668, "logps/chosen": -1.5898168087005615, "logps/rejected": -4.423547267913818, "loss": 0.5333, "nll_loss": 0.5165224075317383, "rewards/accuracies": 1.0, "rewards/chosen": -0.15898168087005615, "rewards/margins": 0.28337302803993225, "rewards/rejected": -0.4423547387123108, "step": 5449 }, { "epoch": 14.921286789869953, "grad_norm": 4.342320442199707, "learning_rate": 2.5342465753424656e-07, "log_odds_chosen": 1.5834026336669922, "log_odds_ratio": -0.24787980318069458, "logits/chosen": 1.1278889179229736, "logits/rejected": 1.1660714149475098, "logps/chosen": -2.0492031574249268, "logps/rejected": -3.515279769897461, "loss": 0.4594, "nll_loss": 0.43464159965515137, "rewards/accuracies": 1.0, "rewards/chosen": -0.20492032170295715, "rewards/margins": 0.1466076821088791, "rewards/rejected": -0.35152798891067505, "step": 5450 }, { "epoch": 14.924024640657084, "grad_norm": 3.2358059883117676, "learning_rate": 2.532876712328767e-07, "log_odds_chosen": 2.0335192680358887, "log_odds_ratio": -0.2802799940109253, "logits/chosen": 1.2560230493545532, "logits/rejected": 1.2479395866394043, "logps/chosen": -2.0102248191833496, "logps/rejected": -3.925039768218994, "loss": 0.4815, "nll_loss": 0.45350614190101624, "rewards/accuracies": 0.75, "rewards/chosen": -0.20102247595787048, "rewards/margins": 0.19148151576519012, "rewards/rejected": -0.3925040066242218, "step": 5451 }, { "epoch": 14.926762491444217, "grad_norm": 3.466521978378296, "learning_rate": 2.531506849315068e-07, "log_odds_chosen": 2.8643321990966797, "log_odds_ratio": -0.20448638498783112, "logits/chosen": 1.1874581575393677, "logits/rejected": 1.1699275970458984, "logps/chosen": -1.6503548622131348, "logps/rejected": -4.32115364074707, "loss": 0.6236, "nll_loss": 0.6031625270843506, "rewards/accuracies": 0.875, "rewards/chosen": -0.16503548622131348, "rewards/margins": 0.2670798599720001, "rewards/rejected": -0.4321153163909912, "step": 5452 }, { "epoch": 14.929500342231348, "grad_norm": 3.050072431564331, "learning_rate": 2.53013698630137e-07, "log_odds_chosen": 3.0499424934387207, "log_odds_ratio": -0.16154834628105164, "logits/chosen": 1.2554748058319092, "logits/rejected": 1.2986862659454346, "logps/chosen": -1.6710317134857178, "logps/rejected": -4.516857147216797, "loss": 0.4227, "nll_loss": 0.40659454464912415, "rewards/accuracies": 1.0, "rewards/chosen": -0.16710317134857178, "rewards/margins": 0.28458258509635925, "rewards/rejected": -0.45168575644493103, "step": 5453 }, { "epoch": 14.932238193018481, "grad_norm": 3.9033751487731934, "learning_rate": 2.528767123287671e-07, "log_odds_chosen": 4.285573959350586, "log_odds_ratio": -0.23708941042423248, "logits/chosen": 1.2599564790725708, "logits/rejected": 1.2586816549301147, "logps/chosen": -2.0054807662963867, "logps/rejected": -6.138855934143066, "loss": 0.4788, "nll_loss": 0.4550890326499939, "rewards/accuracies": 0.875, "rewards/chosen": -0.20054806768894196, "rewards/margins": 0.4133375287055969, "rewards/rejected": -0.6138855814933777, "step": 5454 }, { "epoch": 14.934976043805612, "grad_norm": 7.550199031829834, "learning_rate": 2.527397260273972e-07, "log_odds_chosen": 2.6464760303497314, "log_odds_ratio": -0.2898440659046173, "logits/chosen": 0.9808285236358643, "logits/rejected": 0.8882544040679932, "logps/chosen": -2.466582775115967, "logps/rejected": -4.969864845275879, "loss": 0.631, "nll_loss": 0.602053165435791, "rewards/accuracies": 0.875, "rewards/chosen": -0.24665828049182892, "rewards/margins": 0.2503281533718109, "rewards/rejected": -0.496986448764801, "step": 5455 }, { "epoch": 14.937713894592745, "grad_norm": 11.848106384277344, "learning_rate": 2.5260273972602736e-07, "log_odds_chosen": 1.5329464673995972, "log_odds_ratio": -0.704109251499176, "logits/chosen": 1.194930076599121, "logits/rejected": 1.167508602142334, "logps/chosen": -2.5466561317443848, "logps/rejected": -4.034692287445068, "loss": 0.6623, "nll_loss": 0.5918929576873779, "rewards/accuracies": 0.625, "rewards/chosen": -0.2546656131744385, "rewards/margins": 0.14880363643169403, "rewards/rejected": -0.4034692645072937, "step": 5456 }, { "epoch": 14.940451745379876, "grad_norm": 3.4083657264709473, "learning_rate": 2.524657534246575e-07, "log_odds_chosen": 3.4827208518981934, "log_odds_ratio": -0.15795207023620605, "logits/chosen": 1.159111738204956, "logits/rejected": 1.1167546510696411, "logps/chosen": -1.6163355112075806, "logps/rejected": -4.72656774520874, "loss": 0.5301, "nll_loss": 0.5142779350280762, "rewards/accuracies": 1.0, "rewards/chosen": -0.16163355112075806, "rewards/margins": 0.3110232353210449, "rewards/rejected": -0.472656786441803, "step": 5457 }, { "epoch": 14.94318959616701, "grad_norm": 3.903658151626587, "learning_rate": 2.5232876712328767e-07, "log_odds_chosen": 1.7876110076904297, "log_odds_ratio": -0.2260974496603012, "logits/chosen": 1.0960975885391235, "logits/rejected": 1.0653502941131592, "logps/chosen": -1.299023151397705, "logps/rejected": -2.819002628326416, "loss": 0.3922, "nll_loss": 0.369606614112854, "rewards/accuracies": 1.0, "rewards/chosen": -0.12990230321884155, "rewards/margins": 0.15199793875217438, "rewards/rejected": -0.2819002568721771, "step": 5458 }, { "epoch": 14.94592744695414, "grad_norm": 4.146214962005615, "learning_rate": 2.5219178082191777e-07, "log_odds_chosen": 1.2249058485031128, "log_odds_ratio": -0.28925764560699463, "logits/chosen": 0.9143537282943726, "logits/rejected": 0.7975936532020569, "logps/chosen": -1.9426765441894531, "logps/rejected": -3.0123798847198486, "loss": 0.4839, "nll_loss": 0.45498126745224, "rewards/accuracies": 1.0, "rewards/chosen": -0.1942676603794098, "rewards/margins": 0.10697035491466522, "rewards/rejected": -0.3012380003929138, "step": 5459 }, { "epoch": 14.948665297741274, "grad_norm": 3.6634860038757324, "learning_rate": 2.5205479452054797e-07, "log_odds_chosen": 3.3466272354125977, "log_odds_ratio": -0.11702229082584381, "logits/chosen": 1.2182081937789917, "logits/rejected": 1.2148261070251465, "logps/chosen": -1.4692790508270264, "logps/rejected": -4.380059242248535, "loss": 0.5094, "nll_loss": 0.49770358204841614, "rewards/accuracies": 1.0, "rewards/chosen": -0.14692789316177368, "rewards/margins": 0.29107803106307983, "rewards/rejected": -0.4380059242248535, "step": 5460 }, { "epoch": 14.951403148528405, "grad_norm": 3.4501748085021973, "learning_rate": 2.5191780821917807e-07, "log_odds_chosen": 3.1981828212738037, "log_odds_ratio": -0.15179961919784546, "logits/chosen": 0.8654636740684509, "logits/rejected": 0.8228062391281128, "logps/chosen": -2.023463249206543, "logps/rejected": -5.066330909729004, "loss": 0.5925, "nll_loss": 0.5772721171379089, "rewards/accuracies": 1.0, "rewards/chosen": -0.20234635472297668, "rewards/margins": 0.30428674817085266, "rewards/rejected": -0.5066331028938293, "step": 5461 }, { "epoch": 14.954140999315538, "grad_norm": 5.1336517333984375, "learning_rate": 2.5178082191780817e-07, "log_odds_chosen": 2.2702784538269043, "log_odds_ratio": -0.27047255635261536, "logits/chosen": 1.012990951538086, "logits/rejected": 1.0150058269500732, "logps/chosen": -1.87455415725708, "logps/rejected": -3.9847140312194824, "loss": 0.5453, "nll_loss": 0.518281102180481, "rewards/accuracies": 0.875, "rewards/chosen": -0.187455415725708, "rewards/margins": 0.2110159993171692, "rewards/rejected": -0.3984714150428772, "step": 5462 }, { "epoch": 14.956878850102669, "grad_norm": 8.259989738464355, "learning_rate": 2.516438356164384e-07, "log_odds_chosen": 3.0428709983825684, "log_odds_ratio": -0.5727991461753845, "logits/chosen": 1.304900884628296, "logits/rejected": 1.3376214504241943, "logps/chosen": -3.0305802822113037, "logps/rejected": -5.895109176635742, "loss": 0.704, "nll_loss": 0.6467313170433044, "rewards/accuracies": 0.875, "rewards/chosen": -0.30305802822113037, "rewards/margins": 0.2864528298377991, "rewards/rejected": -0.5895108580589294, "step": 5463 }, { "epoch": 14.959616700889802, "grad_norm": 3.5426175594329834, "learning_rate": 2.515068493150685e-07, "log_odds_chosen": 1.91044282913208, "log_odds_ratio": -0.2661694288253784, "logits/chosen": 1.2555829286575317, "logits/rejected": 1.2521007061004639, "logps/chosen": -1.872489094734192, "logps/rejected": -3.6606812477111816, "loss": 0.5323, "nll_loss": 0.5056933760643005, "rewards/accuracies": 1.0, "rewards/chosen": -0.18724890053272247, "rewards/margins": 0.17881923913955688, "rewards/rejected": -0.36606812477111816, "step": 5464 }, { "epoch": 14.962354551676933, "grad_norm": 6.706659317016602, "learning_rate": 2.5136986301369863e-07, "log_odds_chosen": 2.8913214206695557, "log_odds_ratio": -0.22797930240631104, "logits/chosen": 1.1682924032211304, "logits/rejected": 1.0757540464401245, "logps/chosen": -1.5340741872787476, "logps/rejected": -4.208426475524902, "loss": 0.6277, "nll_loss": 0.6049423217773438, "rewards/accuracies": 0.875, "rewards/chosen": -0.15340742468833923, "rewards/margins": 0.2674352824687958, "rewards/rejected": -0.4208426773548126, "step": 5465 }, { "epoch": 14.965092402464066, "grad_norm": 5.707473278045654, "learning_rate": 2.512328767123287e-07, "log_odds_chosen": 3.6791460514068604, "log_odds_ratio": -0.1957486867904663, "logits/chosen": 1.1064794063568115, "logits/rejected": 1.1238038539886475, "logps/chosen": -1.9843648672103882, "logps/rejected": -5.454419136047363, "loss": 0.5203, "nll_loss": 0.5007650852203369, "rewards/accuracies": 0.875, "rewards/chosen": -0.19843649864196777, "rewards/margins": 0.3470054268836975, "rewards/rejected": -0.5454418659210205, "step": 5466 }, { "epoch": 14.967830253251197, "grad_norm": 3.4991157054901123, "learning_rate": 2.5109589041095893e-07, "log_odds_chosen": 2.7890727519989014, "log_odds_ratio": -0.12342512607574463, "logits/chosen": 1.095487356185913, "logits/rejected": 1.0486276149749756, "logps/chosen": -1.3756519556045532, "logps/rejected": -3.8856098651885986, "loss": 0.4552, "nll_loss": 0.442827045917511, "rewards/accuracies": 1.0, "rewards/chosen": -0.13756519556045532, "rewards/margins": 0.25099581480026245, "rewards/rejected": -0.3885609805583954, "step": 5467 }, { "epoch": 14.97056810403833, "grad_norm": 4.1107611656188965, "learning_rate": 2.5095890410958903e-07, "log_odds_chosen": 4.128880500793457, "log_odds_ratio": -0.1714145988225937, "logits/chosen": 1.0533902645111084, "logits/rejected": 1.036308765411377, "logps/chosen": -1.6382731199264526, "logps/rejected": -5.429410934448242, "loss": 0.5233, "nll_loss": 0.5061662793159485, "rewards/accuracies": 1.0, "rewards/chosen": -0.1638273000717163, "rewards/margins": 0.3791138231754303, "rewards/rejected": -0.542941153049469, "step": 5468 }, { "epoch": 14.973305954825461, "grad_norm": 4.447538375854492, "learning_rate": 2.5082191780821913e-07, "log_odds_chosen": 3.512974739074707, "log_odds_ratio": -0.15328222513198853, "logits/chosen": 0.9563356637954712, "logits/rejected": 0.9596730470657349, "logps/chosen": -1.9550670385360718, "logps/rejected": -5.256237983703613, "loss": 0.609, "nll_loss": 0.5937095880508423, "rewards/accuracies": 1.0, "rewards/chosen": -0.1955067217350006, "rewards/margins": 0.3301171064376831, "rewards/rejected": -0.5256237387657166, "step": 5469 }, { "epoch": 14.976043805612594, "grad_norm": 7.6676435470581055, "learning_rate": 2.5068493150684933e-07, "log_odds_chosen": 3.09552264213562, "log_odds_ratio": -0.43453678488731384, "logits/chosen": 1.0286935567855835, "logits/rejected": 1.0252506732940674, "logps/chosen": -2.516101360321045, "logps/rejected": -5.388329029083252, "loss": 0.6606, "nll_loss": 0.6171375513076782, "rewards/accuracies": 0.75, "rewards/chosen": -0.2516101598739624, "rewards/margins": 0.28722280263900757, "rewards/rejected": -0.5388329029083252, "step": 5470 }, { "epoch": 14.978781656399725, "grad_norm": 4.533636093139648, "learning_rate": 2.5054794520547943e-07, "log_odds_chosen": 1.4669561386108398, "log_odds_ratio": -0.30466020107269287, "logits/chosen": 1.0399281978607178, "logits/rejected": 0.981509268283844, "logps/chosen": -2.3781352043151855, "logps/rejected": -3.730912208557129, "loss": 0.5636, "nll_loss": 0.5331289768218994, "rewards/accuracies": 1.0, "rewards/chosen": -0.23781350255012512, "rewards/margins": 0.13527771830558777, "rewards/rejected": -0.3730912506580353, "step": 5471 }, { "epoch": 14.981519507186858, "grad_norm": 5.679701328277588, "learning_rate": 2.504109589041096e-07, "log_odds_chosen": 1.8051021099090576, "log_odds_ratio": -0.41219162940979004, "logits/chosen": 0.8880577683448792, "logits/rejected": 0.9197109341621399, "logps/chosen": -2.0178449153900146, "logps/rejected": -3.596555233001709, "loss": 0.5136, "nll_loss": 0.47239232063293457, "rewards/accuracies": 0.875, "rewards/chosen": -0.20178449153900146, "rewards/margins": 0.1578710377216339, "rewards/rejected": -0.359655499458313, "step": 5472 }, { "epoch": 14.984257357973991, "grad_norm": 3.577451467514038, "learning_rate": 2.502739726027397e-07, "log_odds_chosen": 2.726447820663452, "log_odds_ratio": -0.3157566785812378, "logits/chosen": 0.9321575164794922, "logits/rejected": 0.8421785831451416, "logps/chosen": -1.7240018844604492, "logps/rejected": -4.329716682434082, "loss": 0.5257, "nll_loss": 0.49411439895629883, "rewards/accuracies": 0.875, "rewards/chosen": -0.17240017652511597, "rewards/margins": 0.2605714797973633, "rewards/rejected": -0.43297162652015686, "step": 5473 }, { "epoch": 14.986995208761122, "grad_norm": 3.4675049781799316, "learning_rate": 2.501369863013699e-07, "log_odds_chosen": 3.4530344009399414, "log_odds_ratio": -0.14618659019470215, "logits/chosen": 0.8745982646942139, "logits/rejected": 0.872342586517334, "logps/chosen": -2.1523354053497314, "logps/rejected": -5.389063835144043, "loss": 0.4971, "nll_loss": 0.4824919104576111, "rewards/accuracies": 1.0, "rewards/chosen": -0.21523353457450867, "rewards/margins": 0.323672890663147, "rewards/rejected": -0.5389063954353333, "step": 5474 }, { "epoch": 14.989733059548255, "grad_norm": 4.525492191314697, "learning_rate": 2.5e-07, "log_odds_chosen": 1.788519263267517, "log_odds_ratio": -0.25301000475883484, "logits/chosen": 1.0485495328903198, "logits/rejected": 1.1377832889556885, "logps/chosen": -2.281661033630371, "logps/rejected": -4.002145290374756, "loss": 0.5619, "nll_loss": 0.5366314649581909, "rewards/accuracies": 0.875, "rewards/chosen": -0.2281661033630371, "rewards/margins": 0.17204846441745758, "rewards/rejected": -0.4002145826816559, "step": 5475 }, { "epoch": 14.992470910335387, "grad_norm": 5.844733238220215, "learning_rate": 2.4986301369863014e-07, "log_odds_chosen": 2.605973243713379, "log_odds_ratio": -0.3673670291900635, "logits/chosen": 1.191072702407837, "logits/rejected": 1.1246575117111206, "logps/chosen": -1.8057122230529785, "logps/rejected": -4.158483505249023, "loss": 0.5724, "nll_loss": 0.5356389284133911, "rewards/accuracies": 0.875, "rewards/chosen": -0.18057122826576233, "rewards/margins": 0.23527713119983673, "rewards/rejected": -0.41584834456443787, "step": 5476 }, { "epoch": 14.99520876112252, "grad_norm": 6.178341865539551, "learning_rate": 2.4972602739726024e-07, "log_odds_chosen": 1.1219415664672852, "log_odds_ratio": -0.3492635488510132, "logits/chosen": 1.1194275617599487, "logits/rejected": 1.0072851181030273, "logps/chosen": -1.9325941801071167, "logps/rejected": -2.8624014854431152, "loss": 0.4813, "nll_loss": 0.44640684127807617, "rewards/accuracies": 0.875, "rewards/chosen": -0.19325941801071167, "rewards/margins": 0.09298073500394821, "rewards/rejected": -0.2862401604652405, "step": 5477 }, { "epoch": 14.99794661190965, "grad_norm": 4.110276222229004, "learning_rate": 2.495890410958904e-07, "log_odds_chosen": 2.0583415031433105, "log_odds_ratio": -0.3452729284763336, "logits/chosen": 1.025759220123291, "logits/rejected": 0.8726418018341064, "logps/chosen": -2.3575046062469482, "logps/rejected": -4.254535675048828, "loss": 0.6003, "nll_loss": 0.5657832026481628, "rewards/accuracies": 0.875, "rewards/chosen": -0.2357504665851593, "rewards/margins": 0.189703106880188, "rewards/rejected": -0.4254536032676697, "step": 5478 }, { "epoch": 15.000684462696784, "grad_norm": 3.399653196334839, "learning_rate": 2.4945205479452054e-07, "log_odds_chosen": 3.878628730773926, "log_odds_ratio": -0.17016342282295227, "logits/chosen": 0.9863134622573853, "logits/rejected": 0.9430090188980103, "logps/chosen": -1.492409348487854, "logps/rejected": -5.13645601272583, "loss": 0.5175, "nll_loss": 0.5004668235778809, "rewards/accuracies": 1.0, "rewards/chosen": -0.14924094080924988, "rewards/margins": 0.36440467834472656, "rewards/rejected": -0.513645589351654, "step": 5479 }, { "epoch": 15.003422313483915, "grad_norm": 10.671453475952148, "learning_rate": 2.493150684931507e-07, "log_odds_chosen": 1.6620500087738037, "log_odds_ratio": -0.6619030237197876, "logits/chosen": 0.9515091180801392, "logits/rejected": 0.9548001885414124, "logps/chosen": -2.5743398666381836, "logps/rejected": -4.039381980895996, "loss": 0.5608, "nll_loss": 0.49459314346313477, "rewards/accuracies": 0.625, "rewards/chosen": -0.25743401050567627, "rewards/margins": 0.1465042382478714, "rewards/rejected": -0.4039382040500641, "step": 5480 }, { "epoch": 15.006160164271048, "grad_norm": 3.254754066467285, "learning_rate": 2.491780821917808e-07, "log_odds_chosen": 4.678740978240967, "log_odds_ratio": -0.06853796541690826, "logits/chosen": 1.1037518978118896, "logits/rejected": 1.1251757144927979, "logps/chosen": -1.8344838619232178, "logps/rejected": -6.299726963043213, "loss": 0.6236, "nll_loss": 0.6167554259300232, "rewards/accuracies": 1.0, "rewards/chosen": -0.1834484040737152, "rewards/margins": 0.4465242922306061, "rewards/rejected": -0.6299726963043213, "step": 5481 }, { "epoch": 15.008898015058179, "grad_norm": 3.3776347637176514, "learning_rate": 2.4904109589041095e-07, "log_odds_chosen": 3.242269515991211, "log_odds_ratio": -0.2729349136352539, "logits/chosen": 1.02083158493042, "logits/rejected": 0.9655463695526123, "logps/chosen": -1.8506742715835571, "logps/rejected": -4.959582328796387, "loss": 0.5508, "nll_loss": 0.5235053300857544, "rewards/accuracies": 0.875, "rewards/chosen": -0.18506741523742676, "rewards/margins": 0.3108908236026764, "rewards/rejected": -0.49595826864242554, "step": 5482 }, { "epoch": 15.011635865845312, "grad_norm": 3.5787100791931152, "learning_rate": 2.489041095890411e-07, "log_odds_chosen": 3.327122449874878, "log_odds_ratio": -0.08354489505290985, "logits/chosen": 1.1283738613128662, "logits/rejected": 1.1269598007202148, "logps/chosen": -1.6728224754333496, "logps/rejected": -4.727313041687012, "loss": 0.506, "nll_loss": 0.49760007858276367, "rewards/accuracies": 1.0, "rewards/chosen": -0.16728225350379944, "rewards/margins": 0.3054490387439728, "rewards/rejected": -0.4727312922477722, "step": 5483 }, { "epoch": 15.014373716632443, "grad_norm": 5.4420952796936035, "learning_rate": 2.487671232876712e-07, "log_odds_chosen": 3.3116416931152344, "log_odds_ratio": -0.18194358050823212, "logits/chosen": 1.2452670335769653, "logits/rejected": 1.337604284286499, "logps/chosen": -2.030658483505249, "logps/rejected": -5.085233688354492, "loss": 0.5266, "nll_loss": 0.5083948969841003, "rewards/accuracies": 1.0, "rewards/chosen": -0.20306584239006042, "rewards/margins": 0.30545753240585327, "rewards/rejected": -0.5085234045982361, "step": 5484 }, { "epoch": 15.017111567419576, "grad_norm": 3.3879354000091553, "learning_rate": 2.4863013698630135e-07, "log_odds_chosen": 4.440232276916504, "log_odds_ratio": -0.08354809135198593, "logits/chosen": 0.9998359084129333, "logits/rejected": 1.0371687412261963, "logps/chosen": -1.2864431142807007, "logps/rejected": -5.211915969848633, "loss": 0.6006, "nll_loss": 0.5922456383705139, "rewards/accuracies": 1.0, "rewards/chosen": -0.12864428758621216, "rewards/margins": 0.3925473392009735, "rewards/rejected": -0.5211916565895081, "step": 5485 }, { "epoch": 15.019849418206707, "grad_norm": 13.312569618225098, "learning_rate": 2.484931506849315e-07, "log_odds_chosen": 2.1844961643218994, "log_odds_ratio": -0.44035473465919495, "logits/chosen": 1.0077896118164062, "logits/rejected": 1.0049723386764526, "logps/chosen": -2.4884049892425537, "logps/rejected": -4.5759711265563965, "loss": 0.8098, "nll_loss": 0.7658141851425171, "rewards/accuracies": 0.875, "rewards/chosen": -0.24884048104286194, "rewards/margins": 0.20875664055347443, "rewards/rejected": -0.45759713649749756, "step": 5486 }, { "epoch": 15.02258726899384, "grad_norm": 4.717682361602783, "learning_rate": 2.4835616438356165e-07, "log_odds_chosen": 1.5126081705093384, "log_odds_ratio": -0.34941908717155457, "logits/chosen": 0.9018450975418091, "logits/rejected": 0.8848966956138611, "logps/chosen": -2.184342622756958, "logps/rejected": -3.617587089538574, "loss": 0.5877, "nll_loss": 0.5527620315551758, "rewards/accuracies": 0.875, "rewards/chosen": -0.21843427419662476, "rewards/margins": 0.14332446455955505, "rewards/rejected": -0.3617587089538574, "step": 5487 }, { "epoch": 15.025325119780971, "grad_norm": 4.321363925933838, "learning_rate": 2.482191780821918e-07, "log_odds_chosen": 1.8068023920059204, "log_odds_ratio": -0.3065309226512909, "logits/chosen": 0.911798894405365, "logits/rejected": 0.9014672040939331, "logps/chosen": -1.7460567951202393, "logps/rejected": -3.43369722366333, "loss": 0.4535, "nll_loss": 0.4228617250919342, "rewards/accuracies": 0.875, "rewards/chosen": -0.17460568249225616, "rewards/margins": 0.16876405477523804, "rewards/rejected": -0.343369722366333, "step": 5488 }, { "epoch": 15.028062970568104, "grad_norm": 3.9996376037597656, "learning_rate": 2.480821917808219e-07, "log_odds_chosen": 1.8145523071289062, "log_odds_ratio": -0.3199685215950012, "logits/chosen": 1.1123415231704712, "logits/rejected": 1.0354185104370117, "logps/chosen": -1.2701247930526733, "logps/rejected": -2.8992793560028076, "loss": 0.539, "nll_loss": 0.5069900751113892, "rewards/accuracies": 1.0, "rewards/chosen": -0.1270124912261963, "rewards/margins": 0.1629154533147812, "rewards/rejected": -0.2899279296398163, "step": 5489 }, { "epoch": 15.030800821355236, "grad_norm": 2.997950315475464, "learning_rate": 2.4794520547945206e-07, "log_odds_chosen": 5.1309709548950195, "log_odds_ratio": -0.09799310564994812, "logits/chosen": 0.9371665716171265, "logits/rejected": 0.8677127361297607, "logps/chosen": -1.146467685699463, "logps/rejected": -5.838678359985352, "loss": 0.4639, "nll_loss": 0.4541306793689728, "rewards/accuracies": 1.0, "rewards/chosen": -0.11464677006006241, "rewards/margins": 0.4692211151123047, "rewards/rejected": -0.5838678479194641, "step": 5490 }, { "epoch": 15.033538672142368, "grad_norm": 4.013827323913574, "learning_rate": 2.4780821917808216e-07, "log_odds_chosen": 3.1043715476989746, "log_odds_ratio": -0.18324464559555054, "logits/chosen": 0.9814595580101013, "logits/rejected": 0.9900956749916077, "logps/chosen": -1.967261552810669, "logps/rejected": -4.808078765869141, "loss": 0.5592, "nll_loss": 0.5409079790115356, "rewards/accuracies": 1.0, "rewards/chosen": -0.19672614336013794, "rewards/margins": 0.28408172726631165, "rewards/rejected": -0.4808078706264496, "step": 5491 }, { "epoch": 15.0362765229295, "grad_norm": 3.559408187866211, "learning_rate": 2.476712328767123e-07, "log_odds_chosen": 3.459392547607422, "log_odds_ratio": -0.14661695063114166, "logits/chosen": 0.9963983297348022, "logits/rejected": 1.021824598312378, "logps/chosen": -1.8137078285217285, "logps/rejected": -5.02742862701416, "loss": 0.5826, "nll_loss": 0.5679153800010681, "rewards/accuracies": 1.0, "rewards/chosen": -0.1813707947731018, "rewards/margins": 0.32137206196784973, "rewards/rejected": -0.5027428865432739, "step": 5492 }, { "epoch": 15.039014373716633, "grad_norm": 4.916147708892822, "learning_rate": 2.4753424657534246e-07, "log_odds_chosen": 1.6131768226623535, "log_odds_ratio": -0.45648062229156494, "logits/chosen": 1.225809097290039, "logits/rejected": 1.289315938949585, "logps/chosen": -2.083735942840576, "logps/rejected": -3.5851247310638428, "loss": 0.5501, "nll_loss": 0.5044986009597778, "rewards/accuracies": 0.875, "rewards/chosen": -0.20837360620498657, "rewards/margins": 0.15013885498046875, "rewards/rejected": -0.3585124611854553, "step": 5493 }, { "epoch": 15.041752224503764, "grad_norm": 3.464388370513916, "learning_rate": 2.473972602739726e-07, "log_odds_chosen": 2.1429030895233154, "log_odds_ratio": -0.20316725969314575, "logits/chosen": 0.8230481147766113, "logits/rejected": 0.8234238028526306, "logps/chosen": -1.5769904851913452, "logps/rejected": -3.490339756011963, "loss": 0.4642, "nll_loss": 0.4439294934272766, "rewards/accuracies": 1.0, "rewards/chosen": -0.15769906342029572, "rewards/margins": 0.19133494794368744, "rewards/rejected": -0.34903398156166077, "step": 5494 }, { "epoch": 15.044490075290897, "grad_norm": 3.422111749649048, "learning_rate": 2.4726027397260277e-07, "log_odds_chosen": 1.975053310394287, "log_odds_ratio": -0.2096782773733139, "logits/chosen": 1.1687395572662354, "logits/rejected": 1.157954454421997, "logps/chosen": -1.4598684310913086, "logps/rejected": -3.242694854736328, "loss": 0.424, "nll_loss": 0.4029998779296875, "rewards/accuracies": 1.0, "rewards/chosen": -0.14598685503005981, "rewards/margins": 0.17828264832496643, "rewards/rejected": -0.32426950335502625, "step": 5495 }, { "epoch": 15.047227926078028, "grad_norm": 3.9068026542663574, "learning_rate": 2.4712328767123286e-07, "log_odds_chosen": 2.7852394580841064, "log_odds_ratio": -0.1629686951637268, "logits/chosen": 1.1286028623580933, "logits/rejected": 1.209294319152832, "logps/chosen": -2.432919502258301, "logps/rejected": -5.057382106781006, "loss": 0.6138, "nll_loss": 0.5975047945976257, "rewards/accuracies": 1.0, "rewards/chosen": -0.2432919293642044, "rewards/margins": 0.2624462842941284, "rewards/rejected": -0.5057381987571716, "step": 5496 }, { "epoch": 15.049965776865161, "grad_norm": 4.593909740447998, "learning_rate": 2.46986301369863e-07, "log_odds_chosen": 1.8067151308059692, "log_odds_ratio": -0.20869937539100647, "logits/chosen": 1.46912682056427, "logits/rejected": 1.4427096843719482, "logps/chosen": -1.1942791938781738, "logps/rejected": -2.7288055419921875, "loss": 0.4314, "nll_loss": 0.41057875752449036, "rewards/accuracies": 1.0, "rewards/chosen": -0.11942791938781738, "rewards/margins": 0.15345263481140137, "rewards/rejected": -0.27288055419921875, "step": 5497 }, { "epoch": 15.052703627652292, "grad_norm": 4.471858978271484, "learning_rate": 2.468493150684931e-07, "log_odds_chosen": 2.7525994777679443, "log_odds_ratio": -0.19279798865318298, "logits/chosen": 0.891139030456543, "logits/rejected": 0.7582206130027771, "logps/chosen": -1.7423298358917236, "logps/rejected": -4.329919815063477, "loss": 0.6901, "nll_loss": 0.6708492040634155, "rewards/accuracies": 1.0, "rewards/chosen": -0.17423298954963684, "rewards/margins": 0.2587589621543884, "rewards/rejected": -0.43299198150634766, "step": 5498 }, { "epoch": 15.055441478439425, "grad_norm": 3.807091474533081, "learning_rate": 2.4671232876712327e-07, "log_odds_chosen": 3.0488479137420654, "log_odds_ratio": -0.13116027414798737, "logits/chosen": 1.0415520668029785, "logits/rejected": 1.0639814138412476, "logps/chosen": -1.8384063243865967, "logps/rejected": -4.687633991241455, "loss": 0.6295, "nll_loss": 0.6163901686668396, "rewards/accuracies": 1.0, "rewards/chosen": -0.18384064733982086, "rewards/margins": 0.28492271900177, "rewards/rejected": -0.46876341104507446, "step": 5499 }, { "epoch": 15.058179329226558, "grad_norm": 3.730567693710327, "learning_rate": 2.465753424657534e-07, "log_odds_chosen": 4.643970489501953, "log_odds_ratio": -0.10778649896383286, "logits/chosen": 0.7771070599555969, "logits/rejected": 0.7246717214584351, "logps/chosen": -1.9894371032714844, "logps/rejected": -6.432744026184082, "loss": 0.5916, "nll_loss": 0.5807944536209106, "rewards/accuracies": 1.0, "rewards/chosen": -0.19894373416900635, "rewards/margins": 0.4443306624889374, "rewards/rejected": -0.6432743668556213, "step": 5500 }, { "epoch": 15.06091718001369, "grad_norm": 4.337676525115967, "learning_rate": 2.4643835616438357e-07, "log_odds_chosen": 5.0038251876831055, "log_odds_ratio": -0.23094430565834045, "logits/chosen": 1.0608370304107666, "logits/rejected": 1.056877613067627, "logps/chosen": -1.866362452507019, "logps/rejected": -6.613899230957031, "loss": 0.5851, "nll_loss": 0.562034547328949, "rewards/accuracies": 0.875, "rewards/chosen": -0.18663625419139862, "rewards/margins": 0.4747536778450012, "rewards/rejected": -0.661389946937561, "step": 5501 }, { "epoch": 15.063655030800822, "grad_norm": 3.9383461475372314, "learning_rate": 2.4630136986301367e-07, "log_odds_chosen": 2.186335802078247, "log_odds_ratio": -0.22076353430747986, "logits/chosen": 1.006609320640564, "logits/rejected": 0.9811497330665588, "logps/chosen": -1.6063306331634521, "logps/rejected": -3.478952407836914, "loss": 0.53, "nll_loss": 0.5079163312911987, "rewards/accuracies": 1.0, "rewards/chosen": -0.16063307225704193, "rewards/margins": 0.1872621476650238, "rewards/rejected": -0.34789520502090454, "step": 5502 }, { "epoch": 15.066392881587953, "grad_norm": 3.7970056533813477, "learning_rate": 2.461643835616438e-07, "log_odds_chosen": 4.929172515869141, "log_odds_ratio": -0.06177469342947006, "logits/chosen": 1.296244740486145, "logits/rejected": 1.3368796110153198, "logps/chosen": -2.3724770545959473, "logps/rejected": -7.131501197814941, "loss": 0.615, "nll_loss": 0.6088175773620605, "rewards/accuracies": 1.0, "rewards/chosen": -0.23724767565727234, "rewards/margins": 0.4759024381637573, "rewards/rejected": -0.713150143623352, "step": 5503 }, { "epoch": 15.069130732375086, "grad_norm": 3.6084916591644287, "learning_rate": 2.46027397260274e-07, "log_odds_chosen": 3.5037899017333984, "log_odds_ratio": -0.17738212645053864, "logits/chosen": 1.3452792167663574, "logits/rejected": 1.230299711227417, "logps/chosen": -1.7979645729064941, "logps/rejected": -5.141146183013916, "loss": 0.4892, "nll_loss": 0.47145965695381165, "rewards/accuracies": 1.0, "rewards/chosen": -0.17979645729064941, "rewards/margins": 0.3343181908130646, "rewards/rejected": -0.5141146779060364, "step": 5504 }, { "epoch": 15.071868583162217, "grad_norm": 4.76975679397583, "learning_rate": 2.458904109589041e-07, "log_odds_chosen": 2.682389736175537, "log_odds_ratio": -0.22044694423675537, "logits/chosen": 1.5319874286651611, "logits/rejected": 1.568682074546814, "logps/chosen": -2.684319496154785, "logps/rejected": -5.2122602462768555, "loss": 0.5617, "nll_loss": 0.5396789312362671, "rewards/accuracies": 0.875, "rewards/chosen": -0.26843196153640747, "rewards/margins": 0.252794086933136, "rewards/rejected": -0.5212260484695435, "step": 5505 }, { "epoch": 15.07460643394935, "grad_norm": 3.0805840492248535, "learning_rate": 2.4575342465753423e-07, "log_odds_chosen": 4.33650541305542, "log_odds_ratio": -0.09406521171331406, "logits/chosen": 0.8535209894180298, "logits/rejected": 0.8362705707550049, "logps/chosen": -1.1231486797332764, "logps/rejected": -5.017861366271973, "loss": 0.4817, "nll_loss": 0.4722521901130676, "rewards/accuracies": 1.0, "rewards/chosen": -0.1123148649930954, "rewards/margins": 0.38947126269340515, "rewards/rejected": -0.5017861723899841, "step": 5506 }, { "epoch": 15.077344284736482, "grad_norm": 7.24016809463501, "learning_rate": 2.456164383561644e-07, "log_odds_chosen": 1.2641618251800537, "log_odds_ratio": -0.40015876293182373, "logits/chosen": 1.1803480386734009, "logits/rejected": 1.1840989589691162, "logps/chosen": -2.2860066890716553, "logps/rejected": -3.4023075103759766, "loss": 0.5625, "nll_loss": 0.522487461566925, "rewards/accuracies": 0.875, "rewards/chosen": -0.22860068082809448, "rewards/margins": 0.11163006722927094, "rewards/rejected": -0.3402307629585266, "step": 5507 }, { "epoch": 15.080082135523615, "grad_norm": 6.115738868713379, "learning_rate": 2.4547945205479453e-07, "log_odds_chosen": 5.13742208480835, "log_odds_ratio": -0.05603471025824547, "logits/chosen": 1.0900474786758423, "logits/rejected": 1.1315840482711792, "logps/chosen": -2.591709613800049, "logps/rejected": -7.497159481048584, "loss": 0.652, "nll_loss": 0.646415114402771, "rewards/accuracies": 1.0, "rewards/chosen": -0.2591709494590759, "rewards/margins": 0.4905450642108917, "rewards/rejected": -0.74971604347229, "step": 5508 }, { "epoch": 15.082819986310746, "grad_norm": 6.693925380706787, "learning_rate": 2.4534246575342463e-07, "log_odds_chosen": 1.9216796159744263, "log_odds_ratio": -0.23886160552501678, "logits/chosen": 1.2611631155014038, "logits/rejected": 1.10890793800354, "logps/chosen": -2.2708301544189453, "logps/rejected": -4.035909652709961, "loss": 0.516, "nll_loss": 0.49211204051971436, "rewards/accuracies": 1.0, "rewards/chosen": -0.2270830273628235, "rewards/margins": 0.17650796473026276, "rewards/rejected": -0.40359097719192505, "step": 5509 }, { "epoch": 15.085557837097879, "grad_norm": 6.962650299072266, "learning_rate": 2.452054794520548e-07, "log_odds_chosen": 3.7420308589935303, "log_odds_ratio": -0.10538528859615326, "logits/chosen": 1.0281968116760254, "logits/rejected": 1.0309921503067017, "logps/chosen": -2.0440690517425537, "logps/rejected": -5.607619762420654, "loss": 0.5345, "nll_loss": 0.5239882469177246, "rewards/accuracies": 1.0, "rewards/chosen": -0.20440691709518433, "rewards/margins": 0.35635513067245483, "rewards/rejected": -0.5607620477676392, "step": 5510 }, { "epoch": 15.08829568788501, "grad_norm": 3.3006656169891357, "learning_rate": 2.4506849315068493e-07, "log_odds_chosen": 2.2685585021972656, "log_odds_ratio": -0.20356342196464539, "logits/chosen": 1.094212532043457, "logits/rejected": 1.0210689306259155, "logps/chosen": -1.959524393081665, "logps/rejected": -4.0750813484191895, "loss": 0.4801, "nll_loss": 0.459783136844635, "rewards/accuracies": 1.0, "rewards/chosen": -0.19595246016979218, "rewards/margins": 0.21155571937561035, "rewards/rejected": -0.40750813484191895, "step": 5511 }, { "epoch": 15.091033538672143, "grad_norm": 3.725560188293457, "learning_rate": 2.4493150684931503e-07, "log_odds_chosen": 3.903257369995117, "log_odds_ratio": -0.13935121893882751, "logits/chosen": 1.0410807132720947, "logits/rejected": 1.0107011795043945, "logps/chosen": -1.1653963327407837, "logps/rejected": -4.619723320007324, "loss": 0.4578, "nll_loss": 0.44387903809547424, "rewards/accuracies": 1.0, "rewards/chosen": -0.11653962731361389, "rewards/margins": 0.34543272852897644, "rewards/rejected": -0.46197232604026794, "step": 5512 }, { "epoch": 15.093771389459274, "grad_norm": 3.5494673252105713, "learning_rate": 2.447945205479452e-07, "log_odds_chosen": 2.9999992847442627, "log_odds_ratio": -0.12827692925930023, "logits/chosen": 0.990036129951477, "logits/rejected": 0.9858372211456299, "logps/chosen": -2.1680188179016113, "logps/rejected": -5.0123677253723145, "loss": 0.4857, "nll_loss": 0.4728948473930359, "rewards/accuracies": 1.0, "rewards/chosen": -0.21680188179016113, "rewards/margins": 0.28443488478660583, "rewards/rejected": -0.5012367963790894, "step": 5513 }, { "epoch": 15.096509240246407, "grad_norm": 4.201300621032715, "learning_rate": 2.4465753424657534e-07, "log_odds_chosen": 2.8540658950805664, "log_odds_ratio": -0.192535862326622, "logits/chosen": 1.0252677202224731, "logits/rejected": 0.9520072340965271, "logps/chosen": -1.132775068283081, "logps/rejected": -3.6481025218963623, "loss": 0.4544, "nll_loss": 0.43515443801879883, "rewards/accuracies": 1.0, "rewards/chosen": -0.11327750980854034, "rewards/margins": 0.25153273344039917, "rewards/rejected": -0.3648102581501007, "step": 5514 }, { "epoch": 15.099247091033538, "grad_norm": 3.253803014755249, "learning_rate": 2.445205479452055e-07, "log_odds_chosen": 5.751570701599121, "log_odds_ratio": -0.04635988548398018, "logits/chosen": 1.077967643737793, "logits/rejected": 1.0514566898345947, "logps/chosen": -1.9923195838928223, "logps/rejected": -7.510142803192139, "loss": 0.5338, "nll_loss": 0.5291860699653625, "rewards/accuracies": 1.0, "rewards/chosen": -0.19923196732997894, "rewards/margins": 0.5517823100090027, "rewards/rejected": -0.7510142922401428, "step": 5515 }, { "epoch": 15.101984941820671, "grad_norm": 4.1313276290893555, "learning_rate": 2.443835616438356e-07, "log_odds_chosen": 3.1861557960510254, "log_odds_ratio": -0.239390030503273, "logits/chosen": 1.1179038286209106, "logits/rejected": 1.1324262619018555, "logps/chosen": -2.188754081726074, "logps/rejected": -5.2435302734375, "loss": 0.653, "nll_loss": 0.6290367841720581, "rewards/accuracies": 0.875, "rewards/chosen": -0.21887540817260742, "rewards/margins": 0.30547767877578735, "rewards/rejected": -0.52435302734375, "step": 5516 }, { "epoch": 15.104722792607802, "grad_norm": 6.614169120788574, "learning_rate": 2.4424657534246574e-07, "log_odds_chosen": 1.7995109558105469, "log_odds_ratio": -0.3839220106601715, "logits/chosen": 1.2861354351043701, "logits/rejected": 1.291222333908081, "logps/chosen": -2.826785087585449, "logps/rejected": -4.499734401702881, "loss": 0.636, "nll_loss": 0.5976088047027588, "rewards/accuracies": 0.75, "rewards/chosen": -0.2826785445213318, "rewards/margins": 0.1672949492931366, "rewards/rejected": -0.449973464012146, "step": 5517 }, { "epoch": 15.107460643394935, "grad_norm": 6.160506248474121, "learning_rate": 2.441095890410959e-07, "log_odds_chosen": 3.1681060791015625, "log_odds_ratio": -0.18763470649719238, "logits/chosen": 0.9902694821357727, "logits/rejected": 0.88265061378479, "logps/chosen": -1.627329707145691, "logps/rejected": -4.592703342437744, "loss": 0.5399, "nll_loss": 0.5211628675460815, "rewards/accuracies": 1.0, "rewards/chosen": -0.16273298859596252, "rewards/margins": 0.2965373694896698, "rewards/rejected": -0.4592703580856323, "step": 5518 }, { "epoch": 15.110198494182066, "grad_norm": 4.188318729400635, "learning_rate": 2.4397260273972604e-07, "log_odds_chosen": 2.070051431655884, "log_odds_ratio": -0.1823795884847641, "logits/chosen": 0.869111180305481, "logits/rejected": 0.8330396413803101, "logps/chosen": -1.9794831275939941, "logps/rejected": -3.8800127506256104, "loss": 0.5353, "nll_loss": 0.5170198678970337, "rewards/accuracies": 1.0, "rewards/chosen": -0.19794832170009613, "rewards/margins": 0.19005294144153595, "rewards/rejected": -0.3880012631416321, "step": 5519 }, { "epoch": 15.1129363449692, "grad_norm": 3.380342721939087, "learning_rate": 2.4383561643835614e-07, "log_odds_chosen": 2.822228193283081, "log_odds_ratio": -0.18470272421836853, "logits/chosen": 1.0071616172790527, "logits/rejected": 1.0492298603057861, "logps/chosen": -1.685215711593628, "logps/rejected": -4.279614448547363, "loss": 0.5242, "nll_loss": 0.5057350993156433, "rewards/accuracies": 0.875, "rewards/chosen": -0.16852158308029175, "rewards/margins": 0.2594399154186249, "rewards/rejected": -0.427961528301239, "step": 5520 }, { "epoch": 15.11567419575633, "grad_norm": 3.4291372299194336, "learning_rate": 2.436986301369863e-07, "log_odds_chosen": 2.7601852416992188, "log_odds_ratio": -0.12602116167545319, "logits/chosen": 1.3065977096557617, "logits/rejected": 1.2624579668045044, "logps/chosen": -1.4613888263702393, "logps/rejected": -3.967437982559204, "loss": 0.472, "nll_loss": 0.4594356119632721, "rewards/accuracies": 1.0, "rewards/chosen": -0.14613887667655945, "rewards/margins": 0.25060492753982544, "rewards/rejected": -0.3967437744140625, "step": 5521 }, { "epoch": 15.118412046543463, "grad_norm": 3.8539702892303467, "learning_rate": 2.435616438356164e-07, "log_odds_chosen": 1.1458661556243896, "log_odds_ratio": -0.30349960923194885, "logits/chosen": 1.3247637748718262, "logits/rejected": 1.17913818359375, "logps/chosen": -1.3841564655303955, "logps/rejected": -2.3433961868286133, "loss": 0.4777, "nll_loss": 0.4473218321800232, "rewards/accuracies": 1.0, "rewards/chosen": -0.1384156495332718, "rewards/margins": 0.09592396020889282, "rewards/rejected": -0.2343396097421646, "step": 5522 }, { "epoch": 15.121149897330595, "grad_norm": 9.493278503417969, "learning_rate": 2.4342465753424655e-07, "log_odds_chosen": 4.68315315246582, "log_odds_ratio": -0.1906980276107788, "logits/chosen": 0.9950039386749268, "logits/rejected": 0.9923189282417297, "logps/chosen": -1.6242084503173828, "logps/rejected": -6.034921169281006, "loss": 0.5746, "nll_loss": 0.5555132627487183, "rewards/accuracies": 0.875, "rewards/chosen": -0.1624208390712738, "rewards/margins": 0.4410713016986847, "rewards/rejected": -0.6034921407699585, "step": 5523 }, { "epoch": 15.123887748117728, "grad_norm": 3.0503876209259033, "learning_rate": 2.432876712328767e-07, "log_odds_chosen": 2.5423479080200195, "log_odds_ratio": -0.11797328293323517, "logits/chosen": 1.3444137573242188, "logits/rejected": 1.3060498237609863, "logps/chosen": -1.882964849472046, "logps/rejected": -4.2410688400268555, "loss": 0.555, "nll_loss": 0.543225884437561, "rewards/accuracies": 1.0, "rewards/chosen": -0.18829646706581116, "rewards/margins": 0.23581039905548096, "rewards/rejected": -0.4241068959236145, "step": 5524 }, { "epoch": 15.126625598904859, "grad_norm": 3.9116649627685547, "learning_rate": 2.4315068493150685e-07, "log_odds_chosen": 3.065293788909912, "log_odds_ratio": -0.10864846408367157, "logits/chosen": 1.12570321559906, "logits/rejected": 1.140329122543335, "logps/chosen": -1.5470893383026123, "logps/rejected": -4.323479652404785, "loss": 0.4681, "nll_loss": 0.4572044014930725, "rewards/accuracies": 1.0, "rewards/chosen": -0.15470892190933228, "rewards/margins": 0.2776390314102173, "rewards/rejected": -0.43234795331954956, "step": 5525 }, { "epoch": 15.129363449691992, "grad_norm": 3.845553159713745, "learning_rate": 2.43013698630137e-07, "log_odds_chosen": 2.0122575759887695, "log_odds_ratio": -0.22727571427822113, "logits/chosen": 1.1983379125595093, "logits/rejected": 1.210551142692566, "logps/chosen": -1.709472417831421, "logps/rejected": -3.519566774368286, "loss": 0.434, "nll_loss": 0.41127580404281616, "rewards/accuracies": 0.875, "rewards/chosen": -0.17094725370407104, "rewards/margins": 0.181009441614151, "rewards/rejected": -0.35195666551589966, "step": 5526 }, { "epoch": 15.132101300479125, "grad_norm": 3.315098285675049, "learning_rate": 2.428767123287671e-07, "log_odds_chosen": 4.636957168579102, "log_odds_ratio": -0.11131773889064789, "logits/chosen": 1.2712759971618652, "logits/rejected": 1.3070447444915771, "logps/chosen": -2.0386009216308594, "logps/rejected": -6.463138103485107, "loss": 0.6397, "nll_loss": 0.6285358667373657, "rewards/accuracies": 1.0, "rewards/chosen": -0.2038601189851761, "rewards/margins": 0.44245368242263794, "rewards/rejected": -0.6463138461112976, "step": 5527 }, { "epoch": 15.134839151266256, "grad_norm": 3.6118483543395996, "learning_rate": 2.4273972602739725e-07, "log_odds_chosen": 3.3948607444763184, "log_odds_ratio": -0.2393399477005005, "logits/chosen": 1.55260169506073, "logits/rejected": 1.4474533796310425, "logps/chosen": -1.3805122375488281, "logps/rejected": -4.566128730773926, "loss": 0.4859, "nll_loss": 0.46197837591171265, "rewards/accuracies": 0.875, "rewards/chosen": -0.13805124163627625, "rewards/margins": 0.3185616731643677, "rewards/rejected": -0.45661288499832153, "step": 5528 }, { "epoch": 15.137577002053389, "grad_norm": 7.017282009124756, "learning_rate": 2.4260273972602735e-07, "log_odds_chosen": 3.2456562519073486, "log_odds_ratio": -0.2592444121837616, "logits/chosen": 1.050084114074707, "logits/rejected": 1.0430251359939575, "logps/chosen": -1.9074243307113647, "logps/rejected": -4.967665195465088, "loss": 0.6412, "nll_loss": 0.6152296662330627, "rewards/accuracies": 0.875, "rewards/chosen": -0.19074244797229767, "rewards/margins": 0.30602410435676575, "rewards/rejected": -0.4967665672302246, "step": 5529 }, { "epoch": 15.14031485284052, "grad_norm": 4.841158866882324, "learning_rate": 2.424657534246575e-07, "log_odds_chosen": 3.1373441219329834, "log_odds_ratio": -0.23010116815567017, "logits/chosen": 1.2035589218139648, "logits/rejected": 1.1917552947998047, "logps/chosen": -1.9525004625320435, "logps/rejected": -4.934406280517578, "loss": 0.588, "nll_loss": 0.5649642944335938, "rewards/accuracies": 0.875, "rewards/chosen": -0.19525004923343658, "rewards/margins": 0.2981905937194824, "rewards/rejected": -0.4934406280517578, "step": 5530 }, { "epoch": 15.143052703627653, "grad_norm": 8.382161140441895, "learning_rate": 2.4232876712328766e-07, "log_odds_chosen": 1.1245262622833252, "log_odds_ratio": -0.9724413156509399, "logits/chosen": 1.058269739151001, "logits/rejected": 1.0398024320602417, "logps/chosen": -2.9408960342407227, "logps/rejected": -3.9251561164855957, "loss": 0.5944, "nll_loss": 0.49712520837783813, "rewards/accuracies": 0.75, "rewards/chosen": -0.2940896451473236, "rewards/margins": 0.09842600673437119, "rewards/rejected": -0.392515629529953, "step": 5531 }, { "epoch": 15.145790554414784, "grad_norm": 3.9701027870178223, "learning_rate": 2.421917808219178e-07, "log_odds_chosen": 4.0071210861206055, "log_odds_ratio": -0.08839438855648041, "logits/chosen": 1.2408931255340576, "logits/rejected": 1.2794668674468994, "logps/chosen": -1.930006980895996, "logps/rejected": -5.7140302658081055, "loss": 0.5004, "nll_loss": 0.4915910065174103, "rewards/accuracies": 1.0, "rewards/chosen": -0.1930007040500641, "rewards/margins": 0.37840238213539124, "rewards/rejected": -0.5714030265808105, "step": 5532 }, { "epoch": 15.148528405201917, "grad_norm": 3.6005301475524902, "learning_rate": 2.4205479452054796e-07, "log_odds_chosen": 1.5596808195114136, "log_odds_ratio": -0.26888370513916016, "logits/chosen": 1.1551802158355713, "logits/rejected": 1.1683177947998047, "logps/chosen": -1.6065127849578857, "logps/rejected": -3.0028531551361084, "loss": 0.5259, "nll_loss": 0.49903416633605957, "rewards/accuracies": 1.0, "rewards/chosen": -0.16065126657485962, "rewards/margins": 0.13963404297828674, "rewards/rejected": -0.30028533935546875, "step": 5533 }, { "epoch": 15.151266255989048, "grad_norm": 3.5058600902557373, "learning_rate": 2.4191780821917806e-07, "log_odds_chosen": 4.076996326446533, "log_odds_ratio": -0.13297316431999207, "logits/chosen": 1.0934809446334839, "logits/rejected": 1.0730314254760742, "logps/chosen": -1.4089373350143433, "logps/rejected": -5.191845893859863, "loss": 0.5939, "nll_loss": 0.5806121826171875, "rewards/accuracies": 1.0, "rewards/chosen": -0.14089372754096985, "rewards/margins": 0.3782908618450165, "rewards/rejected": -0.5191845893859863, "step": 5534 }, { "epoch": 15.154004106776181, "grad_norm": 3.8635241985321045, "learning_rate": 2.417808219178082e-07, "log_odds_chosen": 2.9644365310668945, "log_odds_ratio": -0.09025095403194427, "logits/chosen": 1.1749985218048096, "logits/rejected": 1.2040070295333862, "logps/chosen": -1.4829535484313965, "logps/rejected": -4.154335975646973, "loss": 0.4972, "nll_loss": 0.4881310760974884, "rewards/accuracies": 1.0, "rewards/chosen": -0.1482953429222107, "rewards/margins": 0.26713821291923523, "rewards/rejected": -0.4154335856437683, "step": 5535 }, { "epoch": 15.156741957563312, "grad_norm": 3.3980438709259033, "learning_rate": 2.416438356164383e-07, "log_odds_chosen": 2.814753532409668, "log_odds_ratio": -0.199440598487854, "logits/chosen": 1.1650056838989258, "logits/rejected": 1.062783122062683, "logps/chosen": -1.596034288406372, "logps/rejected": -4.216450214385986, "loss": 0.5559, "nll_loss": 0.5359966158866882, "rewards/accuracies": 1.0, "rewards/chosen": -0.15960343182086945, "rewards/margins": 0.2620415985584259, "rewards/rejected": -0.42164501547813416, "step": 5536 }, { "epoch": 15.159479808350445, "grad_norm": 7.275078296661377, "learning_rate": 2.4150684931506846e-07, "log_odds_chosen": 0.9351863265037537, "log_odds_ratio": -0.42660290002822876, "logits/chosen": 1.1239280700683594, "logits/rejected": 1.0235686302185059, "logps/chosen": -3.126452922821045, "logps/rejected": -3.920426845550537, "loss": 0.5835, "nll_loss": 0.5408231019973755, "rewards/accuracies": 0.875, "rewards/chosen": -0.31264528632164, "rewards/margins": 0.07939739525318146, "rewards/rejected": -0.3920426666736603, "step": 5537 }, { "epoch": 15.162217659137577, "grad_norm": 3.9132306575775146, "learning_rate": 2.413698630136986e-07, "log_odds_chosen": 2.260406255722046, "log_odds_ratio": -0.31051018834114075, "logits/chosen": 1.0853782892227173, "logits/rejected": 1.1130715608596802, "logps/chosen": -1.8278298377990723, "logps/rejected": -3.9406681060791016, "loss": 0.5009, "nll_loss": 0.46989360451698303, "rewards/accuracies": 0.75, "rewards/chosen": -0.18278300762176514, "rewards/margins": 0.2112838625907898, "rewards/rejected": -0.39406684041023254, "step": 5538 }, { "epoch": 15.16495550992471, "grad_norm": 9.710333824157715, "learning_rate": 2.4123287671232877e-07, "log_odds_chosen": 1.751407504081726, "log_odds_ratio": -0.5826010704040527, "logits/chosen": 1.169670820236206, "logits/rejected": 1.213141679763794, "logps/chosen": -2.7796473503112793, "logps/rejected": -4.421699047088623, "loss": 0.6269, "nll_loss": 0.5686478614807129, "rewards/accuracies": 0.75, "rewards/chosen": -0.2779647409915924, "rewards/margins": 0.1642051637172699, "rewards/rejected": -0.4421698749065399, "step": 5539 }, { "epoch": 15.16769336071184, "grad_norm": 4.052856922149658, "learning_rate": 2.410958904109589e-07, "log_odds_chosen": 3.246002674102783, "log_odds_ratio": -0.22172428667545319, "logits/chosen": 1.1156389713287354, "logits/rejected": 1.1117477416992188, "logps/chosen": -1.5744404792785645, "logps/rejected": -4.548809051513672, "loss": 0.4253, "nll_loss": 0.40313658118247986, "rewards/accuracies": 1.0, "rewards/chosen": -0.1574440598487854, "rewards/margins": 0.2974368631839752, "rewards/rejected": -0.4548809230327606, "step": 5540 }, { "epoch": 15.170431211498974, "grad_norm": 6.62529993057251, "learning_rate": 2.40958904109589e-07, "log_odds_chosen": 2.658172607421875, "log_odds_ratio": -0.3396400213241577, "logits/chosen": 0.8288180232048035, "logits/rejected": 0.8061577081680298, "logps/chosen": -1.7855920791625977, "logps/rejected": -4.301034927368164, "loss": 0.5067, "nll_loss": 0.4727295935153961, "rewards/accuracies": 0.875, "rewards/chosen": -0.17855921387672424, "rewards/margins": 0.2515442371368408, "rewards/rejected": -0.43010345101356506, "step": 5541 }, { "epoch": 15.173169062286105, "grad_norm": 4.725543022155762, "learning_rate": 2.4082191780821917e-07, "log_odds_chosen": 2.3589611053466797, "log_odds_ratio": -0.28897595405578613, "logits/chosen": 1.2596396207809448, "logits/rejected": 1.2685635089874268, "logps/chosen": -2.5999560356140137, "logps/rejected": -4.860866069793701, "loss": 0.6746, "nll_loss": 0.6457102298736572, "rewards/accuracies": 0.875, "rewards/chosen": -0.25999563932418823, "rewards/margins": 0.2260909527540207, "rewards/rejected": -0.48608657717704773, "step": 5542 }, { "epoch": 15.175906913073238, "grad_norm": 8.243735313415527, "learning_rate": 2.4068493150684927e-07, "log_odds_chosen": 2.263491153717041, "log_odds_ratio": -0.8996241688728333, "logits/chosen": 1.0845293998718262, "logits/rejected": 1.0935337543487549, "logps/chosen": -3.041482448577881, "logps/rejected": -5.209447383880615, "loss": 0.7309, "nll_loss": 0.6408997178077698, "rewards/accuracies": 0.625, "rewards/chosen": -0.30414822697639465, "rewards/margins": 0.21679651737213135, "rewards/rejected": -0.5209447145462036, "step": 5543 }, { "epoch": 15.178644763860369, "grad_norm": 3.794186592102051, "learning_rate": 2.405479452054794e-07, "log_odds_chosen": 3.9904325008392334, "log_odds_ratio": -0.2664797306060791, "logits/chosen": 1.2257853746414185, "logits/rejected": 1.243360161781311, "logps/chosen": -2.1530346870422363, "logps/rejected": -6.022130012512207, "loss": 0.567, "nll_loss": 0.5403050780296326, "rewards/accuracies": 0.75, "rewards/chosen": -0.2153034508228302, "rewards/margins": 0.38690951466560364, "rewards/rejected": -0.6022129654884338, "step": 5544 }, { "epoch": 15.181382614647502, "grad_norm": 4.267179012298584, "learning_rate": 2.404109589041096e-07, "log_odds_chosen": 1.8264167308807373, "log_odds_ratio": -0.2593700885772705, "logits/chosen": 1.1710071563720703, "logits/rejected": 1.1748347282409668, "logps/chosen": -2.2629218101501465, "logps/rejected": -3.9350783824920654, "loss": 0.4959, "nll_loss": 0.4699609875679016, "rewards/accuracies": 1.0, "rewards/chosen": -0.2262921929359436, "rewards/margins": 0.16721564531326294, "rewards/rejected": -0.39350783824920654, "step": 5545 }, { "epoch": 15.184120465434633, "grad_norm": 4.31620979309082, "learning_rate": 2.4027397260273973e-07, "log_odds_chosen": 2.91087007522583, "log_odds_ratio": -0.2957744002342224, "logits/chosen": 1.031907558441162, "logits/rejected": 1.0542675256729126, "logps/chosen": -1.9068020582199097, "logps/rejected": -4.705322742462158, "loss": 0.5202, "nll_loss": 0.49064481258392334, "rewards/accuracies": 0.875, "rewards/chosen": -0.19068020582199097, "rewards/margins": 0.27985209226608276, "rewards/rejected": -0.47053229808807373, "step": 5546 }, { "epoch": 15.186858316221766, "grad_norm": 4.775933742523193, "learning_rate": 2.401369863013699e-07, "log_odds_chosen": 2.84078049659729, "log_odds_ratio": -0.259572297334671, "logits/chosen": 1.2798383235931396, "logits/rejected": 1.3238041400909424, "logps/chosen": -1.9728459119796753, "logps/rejected": -4.666849136352539, "loss": 0.5999, "nll_loss": 0.5739446878433228, "rewards/accuracies": 1.0, "rewards/chosen": -0.19728459417819977, "rewards/margins": 0.26940032839775085, "rewards/rejected": -0.4666849374771118, "step": 5547 }, { "epoch": 15.189596167008897, "grad_norm": 3.4696314334869385, "learning_rate": 2.4e-07, "log_odds_chosen": 2.6241402626037598, "log_odds_ratio": -0.18312306702136993, "logits/chosen": 1.1214594841003418, "logits/rejected": 1.0743379592895508, "logps/chosen": -1.6465201377868652, "logps/rejected": -4.096646308898926, "loss": 0.6015, "nll_loss": 0.5832080841064453, "rewards/accuracies": 1.0, "rewards/chosen": -0.164652019739151, "rewards/margins": 0.24501264095306396, "rewards/rejected": -0.40966469049453735, "step": 5548 }, { "epoch": 15.19233401779603, "grad_norm": 3.6615335941314697, "learning_rate": 2.3986301369863013e-07, "log_odds_chosen": 3.2370026111602783, "log_odds_ratio": -0.14898669719696045, "logits/chosen": 1.1783095598220825, "logits/rejected": 1.2302906513214111, "logps/chosen": -1.9069039821624756, "logps/rejected": -5.002442359924316, "loss": 0.4645, "nll_loss": 0.4495919942855835, "rewards/accuracies": 1.0, "rewards/chosen": -0.19069041311740875, "rewards/margins": 0.3095538318157196, "rewards/rejected": -0.5002442598342896, "step": 5549 }, { "epoch": 15.195071868583161, "grad_norm": 4.288813591003418, "learning_rate": 2.3972602739726023e-07, "log_odds_chosen": 1.2296550273895264, "log_odds_ratio": -0.29622870683670044, "logits/chosen": 0.9656735062599182, "logits/rejected": 0.9530856609344482, "logps/chosen": -1.4884740114212036, "logps/rejected": -2.4934537410736084, "loss": 0.4678, "nll_loss": 0.4381728768348694, "rewards/accuracies": 1.0, "rewards/chosen": -0.14884740114212036, "rewards/margins": 0.10049796104431152, "rewards/rejected": -0.24934536218643188, "step": 5550 }, { "epoch": 15.197809719370294, "grad_norm": 4.550907611846924, "learning_rate": 2.395890410958904e-07, "log_odds_chosen": 2.7907121181488037, "log_odds_ratio": -0.25691336393356323, "logits/chosen": 1.1134376525878906, "logits/rejected": 1.0976142883300781, "logps/chosen": -1.5317490100860596, "logps/rejected": -4.168170928955078, "loss": 0.4715, "nll_loss": 0.44583314657211304, "rewards/accuracies": 0.875, "rewards/chosen": -0.15317490696907043, "rewards/margins": 0.26364219188690186, "rewards/rejected": -0.4168170988559723, "step": 5551 }, { "epoch": 15.200547570157426, "grad_norm": 4.080965995788574, "learning_rate": 2.3945205479452053e-07, "log_odds_chosen": 3.4517135620117188, "log_odds_ratio": -0.17577342689037323, "logits/chosen": 0.9792729020118713, "logits/rejected": 0.8818548917770386, "logps/chosen": -1.6207234859466553, "logps/rejected": -4.871656894683838, "loss": 0.5608, "nll_loss": 0.5432497262954712, "rewards/accuracies": 1.0, "rewards/chosen": -0.16207236051559448, "rewards/margins": 0.3250933289527893, "rewards/rejected": -0.4871656894683838, "step": 5552 }, { "epoch": 15.203285420944558, "grad_norm": 3.230278253555298, "learning_rate": 2.393150684931507e-07, "log_odds_chosen": 2.742680549621582, "log_odds_ratio": -0.19060620665550232, "logits/chosen": 1.0206695795059204, "logits/rejected": 0.9403039216995239, "logps/chosen": -1.5443962812423706, "logps/rejected": -4.079447269439697, "loss": 0.5069, "nll_loss": 0.4878185987472534, "rewards/accuracies": 1.0, "rewards/chosen": -0.15443962812423706, "rewards/margins": 0.25350508093833923, "rewards/rejected": -0.4079447090625763, "step": 5553 }, { "epoch": 15.206023271731691, "grad_norm": 3.931033134460449, "learning_rate": 2.3917808219178084e-07, "log_odds_chosen": 2.415024518966675, "log_odds_ratio": -0.20601512491703033, "logits/chosen": 1.227577805519104, "logits/rejected": 1.2144355773925781, "logps/chosen": -1.376949667930603, "logps/rejected": -3.427285671234131, "loss": 0.5525, "nll_loss": 0.5319364070892334, "rewards/accuracies": 1.0, "rewards/chosen": -0.13769496977329254, "rewards/margins": 0.20503360033035278, "rewards/rejected": -0.34272855520248413, "step": 5554 }, { "epoch": 15.208761122518823, "grad_norm": 3.4492876529693604, "learning_rate": 2.3904109589041094e-07, "log_odds_chosen": 4.932845115661621, "log_odds_ratio": -0.1723290979862213, "logits/chosen": 1.1989983320236206, "logits/rejected": 1.234354853630066, "logps/chosen": -2.512303113937378, "logps/rejected": -7.365527153015137, "loss": 0.5637, "nll_loss": 0.5464951992034912, "rewards/accuracies": 1.0, "rewards/chosen": -0.2512303292751312, "rewards/margins": 0.4853224754333496, "rewards/rejected": -0.7365527749061584, "step": 5555 }, { "epoch": 15.211498973305956, "grad_norm": 3.583949327468872, "learning_rate": 2.389041095890411e-07, "log_odds_chosen": 4.19680118560791, "log_odds_ratio": -0.062409818172454834, "logits/chosen": 1.2051403522491455, "logits/rejected": 1.1440255641937256, "logps/chosen": -2.0564932823181152, "logps/rejected": -6.084762096405029, "loss": 0.5841, "nll_loss": 0.5778723955154419, "rewards/accuracies": 1.0, "rewards/chosen": -0.20564933121204376, "rewards/margins": 0.4028269052505493, "rewards/rejected": -0.6084762215614319, "step": 5556 }, { "epoch": 15.214236824093087, "grad_norm": 3.2885563373565674, "learning_rate": 2.3876712328767124e-07, "log_odds_chosen": 3.4878461360931396, "log_odds_ratio": -0.09433239698410034, "logits/chosen": 1.4815843105316162, "logits/rejected": 1.459153175354004, "logps/chosen": -1.6130950450897217, "logps/rejected": -4.869463920593262, "loss": 0.4812, "nll_loss": 0.4718034565448761, "rewards/accuracies": 1.0, "rewards/chosen": -0.16130951046943665, "rewards/margins": 0.3256368637084961, "rewards/rejected": -0.4869464039802551, "step": 5557 }, { "epoch": 15.21697467488022, "grad_norm": 7.154568672180176, "learning_rate": 2.3863013698630134e-07, "log_odds_chosen": 1.846917748451233, "log_odds_ratio": -0.3898465037345886, "logits/chosen": 1.3116066455841064, "logits/rejected": 1.2706222534179688, "logps/chosen": -2.0067501068115234, "logps/rejected": -3.675628185272217, "loss": 0.5984, "nll_loss": 0.5593934655189514, "rewards/accuracies": 0.875, "rewards/chosen": -0.20067501068115234, "rewards/margins": 0.1668878197669983, "rewards/rejected": -0.36756280064582825, "step": 5558 }, { "epoch": 15.219712525667351, "grad_norm": 4.037428379058838, "learning_rate": 2.384931506849315e-07, "log_odds_chosen": 2.018707275390625, "log_odds_ratio": -0.37846750020980835, "logits/chosen": 1.0103440284729004, "logits/rejected": 0.9075967669487, "logps/chosen": -1.833850383758545, "logps/rejected": -3.7344822883605957, "loss": 0.5083, "nll_loss": 0.4704959988594055, "rewards/accuracies": 0.875, "rewards/chosen": -0.18338504433631897, "rewards/margins": 0.19006319344043732, "rewards/rejected": -0.3734482228755951, "step": 5559 }, { "epoch": 15.222450376454484, "grad_norm": 5.641062259674072, "learning_rate": 2.3835616438356162e-07, "log_odds_chosen": 2.516185998916626, "log_odds_ratio": -0.2053956240415573, "logits/chosen": 0.8976287841796875, "logits/rejected": 0.89556884765625, "logps/chosen": -1.7603752613067627, "logps/rejected": -4.063810348510742, "loss": 0.6469, "nll_loss": 0.6263329982757568, "rewards/accuracies": 1.0, "rewards/chosen": -0.17603753507137299, "rewards/margins": 0.2303435057401657, "rewards/rejected": -0.4063810706138611, "step": 5560 }, { "epoch": 15.225188227241615, "grad_norm": 12.330522537231445, "learning_rate": 2.3821917808219177e-07, "log_odds_chosen": 3.0787861347198486, "log_odds_ratio": -0.4267489016056061, "logits/chosen": 1.2366629838943481, "logits/rejected": 1.2408708333969116, "logps/chosen": -2.2657089233398438, "logps/rejected": -5.180017948150635, "loss": 0.5755, "nll_loss": 0.5327796936035156, "rewards/accuracies": 0.75, "rewards/chosen": -0.22657090425491333, "rewards/margins": 0.2914309501647949, "rewards/rejected": -0.5180018544197083, "step": 5561 }, { "epoch": 15.227926078028748, "grad_norm": 5.347001075744629, "learning_rate": 2.380821917808219e-07, "log_odds_chosen": 2.0540149211883545, "log_odds_ratio": -0.383090615272522, "logits/chosen": 1.1063119173049927, "logits/rejected": 1.0942392349243164, "logps/chosen": -1.7358629703521729, "logps/rejected": -3.667057752609253, "loss": 0.4352, "nll_loss": 0.39690226316452026, "rewards/accuracies": 0.75, "rewards/chosen": -0.17358629405498505, "rewards/margins": 0.19311949610710144, "rewards/rejected": -0.3667057752609253, "step": 5562 }, { "epoch": 15.23066392881588, "grad_norm": 7.6296539306640625, "learning_rate": 2.3794520547945205e-07, "log_odds_chosen": 2.53935170173645, "log_odds_ratio": -0.3861743211746216, "logits/chosen": 0.9717677235603333, "logits/rejected": 0.8957386612892151, "logps/chosen": -2.6278557777404785, "logps/rejected": -5.018599987030029, "loss": 0.6312, "nll_loss": 0.5925760269165039, "rewards/accuracies": 0.875, "rewards/chosen": -0.26278555393218994, "rewards/margins": 0.2390744388103485, "rewards/rejected": -0.5018599629402161, "step": 5563 }, { "epoch": 15.233401779603012, "grad_norm": 4.504025459289551, "learning_rate": 2.378082191780822e-07, "log_odds_chosen": 3.5796446800231934, "log_odds_ratio": -0.1780034452676773, "logits/chosen": 1.1766408681869507, "logits/rejected": 1.1875499486923218, "logps/chosen": -1.48646080493927, "logps/rejected": -4.74653959274292, "loss": 0.4814, "nll_loss": 0.46359488368034363, "rewards/accuracies": 1.0, "rewards/chosen": -0.14864608645439148, "rewards/margins": 0.3260079026222229, "rewards/rejected": -0.474653959274292, "step": 5564 }, { "epoch": 15.236139630390143, "grad_norm": 3.749767303466797, "learning_rate": 2.3767123287671233e-07, "log_odds_chosen": 3.0387096405029297, "log_odds_ratio": -0.10517764091491699, "logits/chosen": 0.8246980905532837, "logits/rejected": 0.7976725101470947, "logps/chosen": -1.8415820598602295, "logps/rejected": -4.666051864624023, "loss": 0.5726, "nll_loss": 0.5620782375335693, "rewards/accuracies": 1.0, "rewards/chosen": -0.18415820598602295, "rewards/margins": 0.2824469804763794, "rewards/rejected": -0.46660518646240234, "step": 5565 }, { "epoch": 15.238877481177276, "grad_norm": 3.4402787685394287, "learning_rate": 2.3753424657534245e-07, "log_odds_chosen": 2.05690860748291, "log_odds_ratio": -0.28585970401763916, "logits/chosen": 1.2039375305175781, "logits/rejected": 1.243788480758667, "logps/chosen": -2.2441678047180176, "logps/rejected": -4.225794792175293, "loss": 0.5177, "nll_loss": 0.48909395933151245, "rewards/accuracies": 0.75, "rewards/chosen": -0.2244167923927307, "rewards/margins": 0.19816267490386963, "rewards/rejected": -0.42257946729660034, "step": 5566 }, { "epoch": 15.241615331964407, "grad_norm": 4.90392541885376, "learning_rate": 2.3739726027397258e-07, "log_odds_chosen": 1.8968636989593506, "log_odds_ratio": -0.39802786707878113, "logits/chosen": 1.260387659072876, "logits/rejected": 1.3375688791275024, "logps/chosen": -2.4405558109283447, "logps/rejected": -4.228072166442871, "loss": 0.5969, "nll_loss": 0.5571413040161133, "rewards/accuracies": 0.875, "rewards/chosen": -0.2440555989742279, "rewards/margins": 0.1787516176700592, "rewards/rejected": -0.4228072166442871, "step": 5567 }, { "epoch": 15.24435318275154, "grad_norm": 3.623157262802124, "learning_rate": 2.3726027397260273e-07, "log_odds_chosen": 4.086734294891357, "log_odds_ratio": -0.12157872319221497, "logits/chosen": 1.0286293029785156, "logits/rejected": 1.0470327138900757, "logps/chosen": -2.801145076751709, "logps/rejected": -6.75277042388916, "loss": 0.6078, "nll_loss": 0.5956474542617798, "rewards/accuracies": 1.0, "rewards/chosen": -0.2801145315170288, "rewards/margins": 0.39516252279281616, "rewards/rejected": -0.6752769947052002, "step": 5568 }, { "epoch": 15.247091033538672, "grad_norm": 3.8955233097076416, "learning_rate": 2.3712328767123285e-07, "log_odds_chosen": 1.936316728591919, "log_odds_ratio": -0.2429444044828415, "logits/chosen": 1.2624988555908203, "logits/rejected": 1.2938131093978882, "logps/chosen": -2.430147886276245, "logps/rejected": -4.2223663330078125, "loss": 0.6092, "nll_loss": 0.5849233269691467, "rewards/accuracies": 0.875, "rewards/chosen": -0.24301478266716003, "rewards/margins": 0.17922189831733704, "rewards/rejected": -0.42223668098449707, "step": 5569 }, { "epoch": 15.249828884325805, "grad_norm": 5.202371597290039, "learning_rate": 2.36986301369863e-07, "log_odds_chosen": 3.540452480316162, "log_odds_ratio": -0.13074073195457458, "logits/chosen": 1.1432149410247803, "logits/rejected": 1.201116681098938, "logps/chosen": -2.474057197570801, "logps/rejected": -5.901098251342773, "loss": 0.7868, "nll_loss": 0.7737069129943848, "rewards/accuracies": 1.0, "rewards/chosen": -0.24740572273731232, "rewards/margins": 0.3427041172981262, "rewards/rejected": -0.5901098251342773, "step": 5570 }, { "epoch": 15.252566735112936, "grad_norm": 3.24131441116333, "learning_rate": 2.3684931506849316e-07, "log_odds_chosen": 3.356052875518799, "log_odds_ratio": -0.1648813784122467, "logits/chosen": 1.0520437955856323, "logits/rejected": 1.0443984270095825, "logps/chosen": -2.0407533645629883, "logps/rejected": -5.260182857513428, "loss": 0.5102, "nll_loss": 0.4937535524368286, "rewards/accuracies": 1.0, "rewards/chosen": -0.20407536625862122, "rewards/margins": 0.3219429552555084, "rewards/rejected": -0.5260183215141296, "step": 5571 }, { "epoch": 15.255304585900069, "grad_norm": 4.709632873535156, "learning_rate": 2.3671232876712326e-07, "log_odds_chosen": 2.488369941711426, "log_odds_ratio": -0.27057135105133057, "logits/chosen": 1.2541759014129639, "logits/rejected": 1.2241406440734863, "logps/chosen": -1.799297571182251, "logps/rejected": -4.061606407165527, "loss": 0.4999, "nll_loss": 0.47279828786849976, "rewards/accuracies": 0.75, "rewards/chosen": -0.17992976307868958, "rewards/margins": 0.2262309491634369, "rewards/rejected": -0.40616071224212646, "step": 5572 }, { "epoch": 15.2580424366872, "grad_norm": 3.7212977409362793, "learning_rate": 2.365753424657534e-07, "log_odds_chosen": 3.047689437866211, "log_odds_ratio": -0.12651889026165009, "logits/chosen": 1.3650078773498535, "logits/rejected": 1.403900384902954, "logps/chosen": -2.0770692825317383, "logps/rejected": -4.9715118408203125, "loss": 0.5859, "nll_loss": 0.5732751488685608, "rewards/accuracies": 1.0, "rewards/chosen": -0.20770692825317383, "rewards/margins": 0.289444237947464, "rewards/rejected": -0.49715113639831543, "step": 5573 }, { "epoch": 15.260780287474333, "grad_norm": 3.503708600997925, "learning_rate": 2.3643835616438354e-07, "log_odds_chosen": 3.310988664627075, "log_odds_ratio": -0.15238980948925018, "logits/chosen": 1.0700836181640625, "logits/rejected": 1.0316193103790283, "logps/chosen": -1.9535216093063354, "logps/rejected": -5.0552544593811035, "loss": 0.4791, "nll_loss": 0.4638659656047821, "rewards/accuracies": 1.0, "rewards/chosen": -0.19535216689109802, "rewards/margins": 0.31017330288887024, "rewards/rejected": -0.5055254697799683, "step": 5574 }, { "epoch": 15.263518138261464, "grad_norm": 9.439126968383789, "learning_rate": 2.363013698630137e-07, "log_odds_chosen": 1.2372045516967773, "log_odds_ratio": -0.2992638945579529, "logits/chosen": 0.7521743178367615, "logits/rejected": 0.6940429210662842, "logps/chosen": -1.6940836906433105, "logps/rejected": -2.7729225158691406, "loss": 0.544, "nll_loss": 0.5140969753265381, "rewards/accuracies": 1.0, "rewards/chosen": -0.16940836608409882, "rewards/margins": 0.10788387060165405, "rewards/rejected": -0.27729225158691406, "step": 5575 }, { "epoch": 15.266255989048597, "grad_norm": 4.8855133056640625, "learning_rate": 2.3616438356164384e-07, "log_odds_chosen": 1.6371656656265259, "log_odds_ratio": -0.35012760758399963, "logits/chosen": 1.129830002784729, "logits/rejected": 0.9149907827377319, "logps/chosen": -1.1654713153839111, "logps/rejected": -2.558418035507202, "loss": 0.5263, "nll_loss": 0.4912463426589966, "rewards/accuracies": 1.0, "rewards/chosen": -0.116547130048275, "rewards/margins": 0.13929465413093567, "rewards/rejected": -0.25584179162979126, "step": 5576 }, { "epoch": 15.268993839835728, "grad_norm": 3.6844420433044434, "learning_rate": 2.3602739726027397e-07, "log_odds_chosen": 1.3461122512817383, "log_odds_ratio": -0.2568918764591217, "logits/chosen": 0.9372864365577698, "logits/rejected": 0.8871299028396606, "logps/chosen": -1.680377721786499, "logps/rejected": -2.845907211303711, "loss": 0.4992, "nll_loss": 0.47346699237823486, "rewards/accuracies": 1.0, "rewards/chosen": -0.1680377870798111, "rewards/margins": 0.11655294895172119, "rewards/rejected": -0.2845907211303711, "step": 5577 }, { "epoch": 15.271731690622861, "grad_norm": 4.478255271911621, "learning_rate": 2.3589041095890412e-07, "log_odds_chosen": 1.1717157363891602, "log_odds_ratio": -0.3699105381965637, "logits/chosen": 0.9499852061271667, "logits/rejected": 0.8903579115867615, "logps/chosen": -1.985026240348816, "logps/rejected": -3.016181468963623, "loss": 0.4672, "nll_loss": 0.4302513003349304, "rewards/accuracies": 0.875, "rewards/chosen": -0.19850262999534607, "rewards/margins": 0.103115513920784, "rewards/rejected": -0.30161815881729126, "step": 5578 }, { "epoch": 15.274469541409992, "grad_norm": 3.6709353923797607, "learning_rate": 2.3575342465753422e-07, "log_odds_chosen": 2.6267964839935303, "log_odds_ratio": -0.12147816270589828, "logits/chosen": 1.3530652523040771, "logits/rejected": 1.3780193328857422, "logps/chosen": -1.5982656478881836, "logps/rejected": -3.976806879043579, "loss": 0.4921, "nll_loss": 0.4799341857433319, "rewards/accuracies": 1.0, "rewards/chosen": -0.15982654690742493, "rewards/margins": 0.23785412311553955, "rewards/rejected": -0.39768069982528687, "step": 5579 }, { "epoch": 15.277207392197125, "grad_norm": 4.355528831481934, "learning_rate": 2.3561643835616437e-07, "log_odds_chosen": 3.532379150390625, "log_odds_ratio": -0.13425971567630768, "logits/chosen": 1.1911873817443848, "logits/rejected": 1.275285005569458, "logps/chosen": -1.934765338897705, "logps/rejected": -5.243095397949219, "loss": 0.6226, "nll_loss": 0.6091791987419128, "rewards/accuracies": 1.0, "rewards/chosen": -0.19347652792930603, "rewards/margins": 0.3308330178260803, "rewards/rejected": -0.524309515953064, "step": 5580 }, { "epoch": 15.279945242984258, "grad_norm": 3.233565330505371, "learning_rate": 2.354794520547945e-07, "log_odds_chosen": 5.434869766235352, "log_odds_ratio": -0.13923704624176025, "logits/chosen": 0.9517953395843506, "logits/rejected": 0.9213999509811401, "logps/chosen": -1.7170804738998413, "logps/rejected": -6.968670845031738, "loss": 0.5345, "nll_loss": 0.5205901861190796, "rewards/accuracies": 1.0, "rewards/chosen": -0.17170804738998413, "rewards/margins": 0.5251590013504028, "rewards/rejected": -0.6968669891357422, "step": 5581 }, { "epoch": 15.28268309377139, "grad_norm": 3.9805662631988525, "learning_rate": 2.3534246575342465e-07, "log_odds_chosen": 1.3582146167755127, "log_odds_ratio": -0.3620896339416504, "logits/chosen": 1.1375705003738403, "logits/rejected": 1.0678722858428955, "logps/chosen": -1.4164512157440186, "logps/rejected": -2.6287031173706055, "loss": 0.4524, "nll_loss": 0.41616982221603394, "rewards/accuracies": 1.0, "rewards/chosen": -0.14164511859416962, "rewards/margins": 0.12122519314289093, "rewards/rejected": -0.26287031173706055, "step": 5582 }, { "epoch": 15.285420944558522, "grad_norm": 3.531059980392456, "learning_rate": 2.352054794520548e-07, "log_odds_chosen": 2.8077945709228516, "log_odds_ratio": -0.1994614452123642, "logits/chosen": 1.079922080039978, "logits/rejected": 1.0962884426116943, "logps/chosen": -1.7484314441680908, "logps/rejected": -4.366331577301025, "loss": 0.5054, "nll_loss": 0.48545902967453003, "rewards/accuracies": 0.875, "rewards/chosen": -0.17484313249588013, "rewards/margins": 0.2617899775505066, "rewards/rejected": -0.4366331398487091, "step": 5583 }, { "epoch": 15.288158795345653, "grad_norm": 4.042361259460449, "learning_rate": 2.3506849315068492e-07, "log_odds_chosen": 3.475714683532715, "log_odds_ratio": -0.27127015590667725, "logits/chosen": 1.1114732027053833, "logits/rejected": 1.12525475025177, "logps/chosen": -1.6064963340759277, "logps/rejected": -4.88388729095459, "loss": 0.6016, "nll_loss": 0.5744262337684631, "rewards/accuracies": 0.875, "rewards/chosen": -0.1606496274471283, "rewards/margins": 0.32773908972740173, "rewards/rejected": -0.48838871717453003, "step": 5584 }, { "epoch": 15.290896646132786, "grad_norm": 4.114119052886963, "learning_rate": 2.3493150684931508e-07, "log_odds_chosen": 2.9339401721954346, "log_odds_ratio": -0.21618101000785828, "logits/chosen": 1.3776965141296387, "logits/rejected": 1.4050184488296509, "logps/chosen": -2.3592777252197266, "logps/rejected": -5.13715934753418, "loss": 0.5778, "nll_loss": 0.5561692118644714, "rewards/accuracies": 1.0, "rewards/chosen": -0.2359277904033661, "rewards/margins": 0.2777881920337677, "rewards/rejected": -0.5137159824371338, "step": 5585 }, { "epoch": 15.293634496919918, "grad_norm": 12.433584213256836, "learning_rate": 2.3479452054794518e-07, "log_odds_chosen": 2.4729366302490234, "log_odds_ratio": -0.334825336933136, "logits/chosen": 1.2557872533798218, "logits/rejected": 1.2900152206420898, "logps/chosen": -2.898357629776001, "logps/rejected": -5.204359531402588, "loss": 0.5382, "nll_loss": 0.5047603845596313, "rewards/accuracies": 0.75, "rewards/chosen": -0.28983578085899353, "rewards/margins": 0.23060017824172974, "rewards/rejected": -0.5204359889030457, "step": 5586 }, { "epoch": 15.29637234770705, "grad_norm": 4.16682767868042, "learning_rate": 2.3465753424657533e-07, "log_odds_chosen": 2.1631531715393066, "log_odds_ratio": -0.3009571433067322, "logits/chosen": 1.144012451171875, "logits/rejected": 1.1115081310272217, "logps/chosen": -1.92558753490448, "logps/rejected": -3.971275806427002, "loss": 0.4864, "nll_loss": 0.45630019903182983, "rewards/accuracies": 0.875, "rewards/chosen": -0.19255876541137695, "rewards/margins": 0.20456881821155548, "rewards/rejected": -0.39712759852409363, "step": 5587 }, { "epoch": 15.299110198494182, "grad_norm": 5.764132499694824, "learning_rate": 2.3452054794520548e-07, "log_odds_chosen": 1.7931548357009888, "log_odds_ratio": -0.4255695939064026, "logits/chosen": 1.1084041595458984, "logits/rejected": 1.0196377038955688, "logps/chosen": -2.3076720237731934, "logps/rejected": -3.9653239250183105, "loss": 0.5816, "nll_loss": 0.5390677452087402, "rewards/accuracies": 0.875, "rewards/chosen": -0.2307671755552292, "rewards/margins": 0.16576522588729858, "rewards/rejected": -0.3965323865413666, "step": 5588 }, { "epoch": 15.301848049281315, "grad_norm": 6.0431599617004395, "learning_rate": 2.343835616438356e-07, "log_odds_chosen": 2.7202625274658203, "log_odds_ratio": -0.31536975502967834, "logits/chosen": 0.8550989627838135, "logits/rejected": 0.8632991313934326, "logps/chosen": -3.223142147064209, "logps/rejected": -5.806031227111816, "loss": 0.6942, "nll_loss": 0.6626368761062622, "rewards/accuracies": 0.75, "rewards/chosen": -0.32231420278549194, "rewards/margins": 0.2582889199256897, "rewards/rejected": -0.5806031227111816, "step": 5589 }, { "epoch": 15.304585900068446, "grad_norm": 3.775923490524292, "learning_rate": 2.3424657534246576e-07, "log_odds_chosen": 2.378478527069092, "log_odds_ratio": -0.21797867119312286, "logits/chosen": 1.0966782569885254, "logits/rejected": 1.1042113304138184, "logps/chosen": -1.9181932210922241, "logps/rejected": -4.129118919372559, "loss": 0.5552, "nll_loss": 0.5334353446960449, "rewards/accuracies": 0.875, "rewards/chosen": -0.19181932508945465, "rewards/margins": 0.2210925817489624, "rewards/rejected": -0.41291192173957825, "step": 5590 }, { "epoch": 15.307323750855579, "grad_norm": 5.390332221984863, "learning_rate": 2.3410958904109588e-07, "log_odds_chosen": 2.146052598953247, "log_odds_ratio": -0.35311996936798096, "logits/chosen": 0.9035324454307556, "logits/rejected": 0.9192020893096924, "logps/chosen": -2.166151762008667, "logps/rejected": -4.217968463897705, "loss": 0.5977, "nll_loss": 0.5623440146446228, "rewards/accuracies": 0.875, "rewards/chosen": -0.21661518514156342, "rewards/margins": 0.20518165826797485, "rewards/rejected": -0.42179685831069946, "step": 5591 }, { "epoch": 15.31006160164271, "grad_norm": 5.555275917053223, "learning_rate": 2.3397260273972603e-07, "log_odds_chosen": 2.0385184288024902, "log_odds_ratio": -0.2841942310333252, "logits/chosen": 1.1658339500427246, "logits/rejected": 1.1348775625228882, "logps/chosen": -1.7398462295532227, "logps/rejected": -3.531355381011963, "loss": 0.5133, "nll_loss": 0.48488685488700867, "rewards/accuracies": 1.0, "rewards/chosen": -0.1739846169948578, "rewards/margins": 0.17915092408657074, "rewards/rejected": -0.3531355857849121, "step": 5592 }, { "epoch": 15.312799452429843, "grad_norm": 6.756191730499268, "learning_rate": 2.3383561643835613e-07, "log_odds_chosen": 2.127835750579834, "log_odds_ratio": -0.30073463916778564, "logits/chosen": 0.9686495065689087, "logits/rejected": 0.9603645205497742, "logps/chosen": -2.3473923206329346, "logps/rejected": -4.281928062438965, "loss": 0.5432, "nll_loss": 0.5130856037139893, "rewards/accuracies": 0.875, "rewards/chosen": -0.2347392439842224, "rewards/margins": 0.19345355033874512, "rewards/rejected": -0.42819279432296753, "step": 5593 }, { "epoch": 15.315537303216974, "grad_norm": 3.148629665374756, "learning_rate": 2.3369863013698629e-07, "log_odds_chosen": 4.417675018310547, "log_odds_ratio": -0.08967620134353638, "logits/chosen": 1.184329628944397, "logits/rejected": 1.1982501745224, "logps/chosen": -1.7952797412872314, "logps/rejected": -5.991954803466797, "loss": 0.5437, "nll_loss": 0.53473299741745, "rewards/accuracies": 1.0, "rewards/chosen": -0.17952796816825867, "rewards/margins": 0.41966745257377625, "rewards/rejected": -0.5991954803466797, "step": 5594 }, { "epoch": 15.318275154004107, "grad_norm": 3.492234706878662, "learning_rate": 2.3356164383561644e-07, "log_odds_chosen": 2.6778759956359863, "log_odds_ratio": -0.26495543122291565, "logits/chosen": 0.8034144639968872, "logits/rejected": 0.7543951869010925, "logps/chosen": -1.9745981693267822, "logps/rejected": -4.474938869476318, "loss": 0.5781, "nll_loss": 0.5516142845153809, "rewards/accuracies": 0.875, "rewards/chosen": -0.19745981693267822, "rewards/margins": 0.25003406405448914, "rewards/rejected": -0.44749388098716736, "step": 5595 }, { "epoch": 15.321013004791238, "grad_norm": 4.43383264541626, "learning_rate": 2.3342465753424656e-07, "log_odds_chosen": 2.9738590717315674, "log_odds_ratio": -0.2279004007577896, "logits/chosen": 1.2476255893707275, "logits/rejected": 1.223666787147522, "logps/chosen": -1.9796658754348755, "logps/rejected": -4.806309223175049, "loss": 0.6001, "nll_loss": 0.5773444175720215, "rewards/accuracies": 0.875, "rewards/chosen": -0.1979665905237198, "rewards/margins": 0.28266429901123047, "rewards/rejected": -0.48063093423843384, "step": 5596 }, { "epoch": 15.323750855578371, "grad_norm": 3.872857093811035, "learning_rate": 2.3328767123287672e-07, "log_odds_chosen": 1.6895002126693726, "log_odds_ratio": -0.2579803466796875, "logits/chosen": 1.1891744136810303, "logits/rejected": 1.1797454357147217, "logps/chosen": -1.0934665203094482, "logps/rejected": -2.5070135593414307, "loss": 0.3734, "nll_loss": 0.34760645031929016, "rewards/accuracies": 1.0, "rewards/chosen": -0.10934664309024811, "rewards/margins": 0.14135470986366272, "rewards/rejected": -0.250701367855072, "step": 5597 }, { "epoch": 15.326488706365502, "grad_norm": 3.642882823944092, "learning_rate": 2.3315068493150684e-07, "log_odds_chosen": 2.936206340789795, "log_odds_ratio": -0.22154588997364044, "logits/chosen": 0.9284767508506775, "logits/rejected": 0.8441096544265747, "logps/chosen": -1.9859696626663208, "logps/rejected": -4.752316951751709, "loss": 0.5182, "nll_loss": 0.49605196714401245, "rewards/accuracies": 0.875, "rewards/chosen": -0.19859696924686432, "rewards/margins": 0.27663475275039673, "rewards/rejected": -0.47523170709609985, "step": 5598 }, { "epoch": 15.329226557152635, "grad_norm": 4.005402565002441, "learning_rate": 2.3301369863013697e-07, "log_odds_chosen": 1.610735535621643, "log_odds_ratio": -0.257484495639801, "logits/chosen": 1.1975339651107788, "logits/rejected": 1.211500644683838, "logps/chosen": -1.454114317893982, "logps/rejected": -2.8664369583129883, "loss": 0.5379, "nll_loss": 0.5121111869812012, "rewards/accuracies": 0.875, "rewards/chosen": -0.1454114317893982, "rewards/margins": 0.14123226702213287, "rewards/rejected": -0.28664371371269226, "step": 5599 }, { "epoch": 15.331964407939767, "grad_norm": 3.664369583129883, "learning_rate": 2.328767123287671e-07, "log_odds_chosen": 4.458224296569824, "log_odds_ratio": -0.06784264743328094, "logits/chosen": 1.009350061416626, "logits/rejected": 1.100050687789917, "logps/chosen": -2.1228249073028564, "logps/rejected": -6.437343597412109, "loss": 0.649, "nll_loss": 0.6422328352928162, "rewards/accuracies": 1.0, "rewards/chosen": -0.2122824788093567, "rewards/margins": 0.4314519166946411, "rewards/rejected": -0.6437343955039978, "step": 5600 }, { "epoch": 15.3347022587269, "grad_norm": 3.200160503387451, "learning_rate": 2.3273972602739724e-07, "log_odds_chosen": 2.7821860313415527, "log_odds_ratio": -0.10182783007621765, "logits/chosen": 1.5639921426773071, "logits/rejected": 1.5348378419876099, "logps/chosen": -1.6465213298797607, "logps/rejected": -4.173197269439697, "loss": 0.4459, "nll_loss": 0.4357343018054962, "rewards/accuracies": 1.0, "rewards/chosen": -0.16465213894844055, "rewards/margins": 0.2526676058769226, "rewards/rejected": -0.41731971502304077, "step": 5601 }, { "epoch": 15.33744010951403, "grad_norm": 3.5972466468811035, "learning_rate": 2.326027397260274e-07, "log_odds_chosen": 3.509526491165161, "log_odds_ratio": -0.09070907533168793, "logits/chosen": 0.8031806945800781, "logits/rejected": 0.747469425201416, "logps/chosen": -1.8482770919799805, "logps/rejected": -5.1388068199157715, "loss": 0.6361, "nll_loss": 0.6270749568939209, "rewards/accuracies": 1.0, "rewards/chosen": -0.18482771515846252, "rewards/margins": 0.3290529251098633, "rewards/rejected": -0.5138806700706482, "step": 5602 }, { "epoch": 15.340177960301164, "grad_norm": 6.624818801879883, "learning_rate": 2.3246575342465752e-07, "log_odds_chosen": 3.498283863067627, "log_odds_ratio": -0.4132622480392456, "logits/chosen": 1.2733019590377808, "logits/rejected": 1.2546074390411377, "logps/chosen": -2.3443193435668945, "logps/rejected": -5.76528787612915, "loss": 0.6445, "nll_loss": 0.6031625270843506, "rewards/accuracies": 0.875, "rewards/chosen": -0.2344319224357605, "rewards/margins": 0.34209686517715454, "rewards/rejected": -0.576528787612915, "step": 5603 }, { "epoch": 15.342915811088295, "grad_norm": 4.649794578552246, "learning_rate": 2.3232876712328767e-07, "log_odds_chosen": 2.379737377166748, "log_odds_ratio": -0.3370709717273712, "logits/chosen": 0.8875208497047424, "logits/rejected": 0.7738745212554932, "logps/chosen": -2.190171480178833, "logps/rejected": -4.427037715911865, "loss": 0.6669, "nll_loss": 0.6331865787506104, "rewards/accuracies": 0.875, "rewards/chosen": -0.2190171480178833, "rewards/margins": 0.2236866056919098, "rewards/rejected": -0.4427037239074707, "step": 5604 }, { "epoch": 15.345653661875428, "grad_norm": 3.7338593006134033, "learning_rate": 2.321917808219178e-07, "log_odds_chosen": 2.9656529426574707, "log_odds_ratio": -0.17093360424041748, "logits/chosen": 1.0416369438171387, "logits/rejected": 1.0043740272521973, "logps/chosen": -1.5588699579238892, "logps/rejected": -4.286908149719238, "loss": 0.4497, "nll_loss": 0.43265625834465027, "rewards/accuracies": 1.0, "rewards/chosen": -0.15588700771331787, "rewards/margins": 0.2728038430213928, "rewards/rejected": -0.4286908507347107, "step": 5605 }, { "epoch": 15.34839151266256, "grad_norm": 8.09969425201416, "learning_rate": 2.3205479452054793e-07, "log_odds_chosen": 2.6629931926727295, "log_odds_ratio": -0.17305409908294678, "logits/chosen": 1.0169237852096558, "logits/rejected": 0.9518743753433228, "logps/chosen": -2.390765428543091, "logps/rejected": -4.88198184967041, "loss": 0.6355, "nll_loss": 0.6182072162628174, "rewards/accuracies": 1.0, "rewards/chosen": -0.23907655477523804, "rewards/margins": 0.24912163615226746, "rewards/rejected": -0.4881981909275055, "step": 5606 }, { "epoch": 15.351129363449692, "grad_norm": 3.3314664363861084, "learning_rate": 2.3191780821917808e-07, "log_odds_chosen": 3.309626579284668, "log_odds_ratio": -0.16822807490825653, "logits/chosen": 1.060922622680664, "logits/rejected": 1.0683515071868896, "logps/chosen": -1.7650408744812012, "logps/rejected": -4.802698135375977, "loss": 0.463, "nll_loss": 0.44617822766304016, "rewards/accuracies": 1.0, "rewards/chosen": -0.17650410532951355, "rewards/margins": 0.3037657141685486, "rewards/rejected": -0.48026978969573975, "step": 5607 }, { "epoch": 15.353867214236825, "grad_norm": 3.554311513900757, "learning_rate": 2.317808219178082e-07, "log_odds_chosen": 4.956330299377441, "log_odds_ratio": -0.15638995170593262, "logits/chosen": 1.1955841779708862, "logits/rejected": 1.2078652381896973, "logps/chosen": -2.2212982177734375, "logps/rejected": -7.045601844787598, "loss": 0.5763, "nll_loss": 0.5606845617294312, "rewards/accuracies": 0.875, "rewards/chosen": -0.22212983667850494, "rewards/margins": 0.4824303686618805, "rewards/rejected": -0.7045601606369019, "step": 5608 }, { "epoch": 15.356605065023956, "grad_norm": 4.338512897491455, "learning_rate": 2.3164383561643836e-07, "log_odds_chosen": 2.2140538692474365, "log_odds_ratio": -0.27901583909988403, "logits/chosen": 1.065976858139038, "logits/rejected": 1.132761836051941, "logps/chosen": -2.2693471908569336, "logps/rejected": -4.377416133880615, "loss": 0.6047, "nll_loss": 0.5768358707427979, "rewards/accuracies": 1.0, "rewards/chosen": -0.22693473100662231, "rewards/margins": 0.21080687642097473, "rewards/rejected": -0.43774160742759705, "step": 5609 }, { "epoch": 15.359342915811089, "grad_norm": 3.238590955734253, "learning_rate": 2.3150684931506848e-07, "log_odds_chosen": 1.352422833442688, "log_odds_ratio": -0.2958352565765381, "logits/chosen": 1.002018690109253, "logits/rejected": 0.9192960262298584, "logps/chosen": -1.7951939105987549, "logps/rejected": -2.996272563934326, "loss": 0.4928, "nll_loss": 0.46322911977767944, "rewards/accuracies": 1.0, "rewards/chosen": -0.1795194149017334, "rewards/margins": 0.12010782957077026, "rewards/rejected": -0.29962724447250366, "step": 5610 }, { "epoch": 15.36208076659822, "grad_norm": 4.898963451385498, "learning_rate": 2.3136986301369863e-07, "log_odds_chosen": 2.088191032409668, "log_odds_ratio": -0.20859023928642273, "logits/chosen": 0.8678580522537231, "logits/rejected": 0.7708415985107422, "logps/chosen": -2.3412270545959473, "logps/rejected": -4.285251617431641, "loss": 0.453, "nll_loss": 0.43213507533073425, "rewards/accuracies": 1.0, "rewards/chosen": -0.23412269353866577, "rewards/margins": 0.19440248608589172, "rewards/rejected": -0.4285252094268799, "step": 5611 }, { "epoch": 15.364818617385353, "grad_norm": 2.9993135929107666, "learning_rate": 2.3123287671232873e-07, "log_odds_chosen": 3.530989646911621, "log_odds_ratio": -0.10310148447751999, "logits/chosen": 1.212332010269165, "logits/rejected": 1.1355973482131958, "logps/chosen": -1.4097826480865479, "logps/rejected": -4.628726959228516, "loss": 0.4138, "nll_loss": 0.4035184681415558, "rewards/accuracies": 1.0, "rewards/chosen": -0.14097827672958374, "rewards/margins": 0.32189446687698364, "rewards/rejected": -0.4628727436065674, "step": 5612 }, { "epoch": 15.367556468172484, "grad_norm": 3.8737077713012695, "learning_rate": 2.3109589041095888e-07, "log_odds_chosen": 2.707108497619629, "log_odds_ratio": -0.41595765948295593, "logits/chosen": 0.9954022169113159, "logits/rejected": 0.9473421573638916, "logps/chosen": -1.9803965091705322, "logps/rejected": -4.581865310668945, "loss": 0.5252, "nll_loss": 0.4835774302482605, "rewards/accuracies": 1.0, "rewards/chosen": -0.19803966581821442, "rewards/margins": 0.2601468861103058, "rewards/rejected": -0.458186537027359, "step": 5613 }, { "epoch": 15.370294318959617, "grad_norm": 3.470193386077881, "learning_rate": 2.3095890410958904e-07, "log_odds_chosen": 2.54144549369812, "log_odds_ratio": -0.2544909715652466, "logits/chosen": 1.2194701433181763, "logits/rejected": 1.2455809116363525, "logps/chosen": -1.9999618530273438, "logps/rejected": -4.420360565185547, "loss": 0.5712, "nll_loss": 0.5457067489624023, "rewards/accuracies": 0.875, "rewards/chosen": -0.1999961882829666, "rewards/margins": 0.24203988909721375, "rewards/rejected": -0.4420360326766968, "step": 5614 }, { "epoch": 15.373032169746748, "grad_norm": 3.7384965419769287, "learning_rate": 2.3082191780821916e-07, "log_odds_chosen": 3.0608901977539062, "log_odds_ratio": -0.2508396506309509, "logits/chosen": 1.2033692598342896, "logits/rejected": 1.1941134929656982, "logps/chosen": -2.0091543197631836, "logps/rejected": -4.894942283630371, "loss": 0.6174, "nll_loss": 0.5923130512237549, "rewards/accuracies": 0.875, "rewards/chosen": -0.20091545581817627, "rewards/margins": 0.2885788381099701, "rewards/rejected": -0.489494264125824, "step": 5615 }, { "epoch": 15.375770020533881, "grad_norm": 3.6112823486328125, "learning_rate": 2.3068493150684931e-07, "log_odds_chosen": 1.7332314252853394, "log_odds_ratio": -0.28476274013519287, "logits/chosen": 1.4324979782104492, "logits/rejected": 1.4561638832092285, "logps/chosen": -1.381815791130066, "logps/rejected": -2.8878421783447266, "loss": 0.4632, "nll_loss": 0.4347487688064575, "rewards/accuracies": 0.875, "rewards/chosen": -0.13818158209323883, "rewards/margins": 0.15060263872146606, "rewards/rejected": -0.2887842059135437, "step": 5616 }, { "epoch": 15.378507871321013, "grad_norm": 3.378859758377075, "learning_rate": 2.3054794520547944e-07, "log_odds_chosen": 3.149494171142578, "log_odds_ratio": -0.07709315419197083, "logits/chosen": 1.2151577472686768, "logits/rejected": 1.2322056293487549, "logps/chosen": -1.4795072078704834, "logps/rejected": -4.271084785461426, "loss": 0.5164, "nll_loss": 0.5086462497711182, "rewards/accuracies": 1.0, "rewards/chosen": -0.14795070886611938, "rewards/margins": 0.27915775775909424, "rewards/rejected": -0.4271084666252136, "step": 5617 }, { "epoch": 15.381245722108146, "grad_norm": 3.6751599311828613, "learning_rate": 2.304109589041096e-07, "log_odds_chosen": 2.3067970275878906, "log_odds_ratio": -0.15046991407871246, "logits/chosen": 1.1463435888290405, "logits/rejected": 1.0075757503509521, "logps/chosen": -1.2934281826019287, "logps/rejected": -3.3349344730377197, "loss": 0.3986, "nll_loss": 0.38353636860847473, "rewards/accuracies": 1.0, "rewards/chosen": -0.12934280931949615, "rewards/margins": 0.20415063202381134, "rewards/rejected": -0.3334934711456299, "step": 5618 }, { "epoch": 15.383983572895277, "grad_norm": 4.439798355102539, "learning_rate": 2.302739726027397e-07, "log_odds_chosen": 2.961324453353882, "log_odds_ratio": -0.19848018884658813, "logits/chosen": 1.0392628908157349, "logits/rejected": 1.0507630109786987, "logps/chosen": -2.361936569213867, "logps/rejected": -5.2275071144104, "loss": 0.6366, "nll_loss": 0.6167806386947632, "rewards/accuracies": 0.875, "rewards/chosen": -0.23619365692138672, "rewards/margins": 0.28655704855918884, "rewards/rejected": -0.522750735282898, "step": 5619 }, { "epoch": 15.38672142368241, "grad_norm": 3.418449878692627, "learning_rate": 2.3013698630136984e-07, "log_odds_chosen": 2.808237075805664, "log_odds_ratio": -0.15552595257759094, "logits/chosen": 1.360256314277649, "logits/rejected": 1.3837891817092896, "logps/chosen": -1.7516714334487915, "logps/rejected": -4.375214576721191, "loss": 0.554, "nll_loss": 0.5384209752082825, "rewards/accuracies": 1.0, "rewards/chosen": -0.17516714334487915, "rewards/margins": 0.2623543441295624, "rewards/rejected": -0.43752148747444153, "step": 5620 }, { "epoch": 15.38945927446954, "grad_norm": 4.062816143035889, "learning_rate": 2.3e-07, "log_odds_chosen": 1.3700392246246338, "log_odds_ratio": -0.4140276312828064, "logits/chosen": 0.9437958002090454, "logits/rejected": 0.8258242607116699, "logps/chosen": -1.722938060760498, "logps/rejected": -2.81673002243042, "loss": 0.5708, "nll_loss": 0.5294283032417297, "rewards/accuracies": 0.75, "rewards/chosen": -0.1722937971353531, "rewards/margins": 0.10937919467687607, "rewards/rejected": -0.28167301416397095, "step": 5621 }, { "epoch": 15.392197125256674, "grad_norm": 3.82828950881958, "learning_rate": 2.2986301369863012e-07, "log_odds_chosen": 3.4536445140838623, "log_odds_ratio": -0.09128853678703308, "logits/chosen": 1.03532075881958, "logits/rejected": 1.0081464052200317, "logps/chosen": -2.321345329284668, "logps/rejected": -5.615431785583496, "loss": 0.6273, "nll_loss": 0.6181349158287048, "rewards/accuracies": 1.0, "rewards/chosen": -0.23213455080986023, "rewards/margins": 0.3294086456298828, "rewards/rejected": -0.5615431666374207, "step": 5622 }, { "epoch": 15.394934976043805, "grad_norm": 3.423306703567505, "learning_rate": 2.2972602739726027e-07, "log_odds_chosen": 4.458354949951172, "log_odds_ratio": -0.17482292652130127, "logits/chosen": 1.23508620262146, "logits/rejected": 1.2616733312606812, "logps/chosen": -2.0356948375701904, "logps/rejected": -6.346563339233398, "loss": 0.5238, "nll_loss": 0.5063263773918152, "rewards/accuracies": 1.0, "rewards/chosen": -0.20356950163841248, "rewards/margins": 0.43108683824539185, "rewards/rejected": -0.6346563696861267, "step": 5623 }, { "epoch": 15.397672826830938, "grad_norm": 3.801607131958008, "learning_rate": 2.295890410958904e-07, "log_odds_chosen": 2.8677852153778076, "log_odds_ratio": -0.16938260197639465, "logits/chosen": 1.028545618057251, "logits/rejected": 1.043467402458191, "logps/chosen": -1.9866756200790405, "logps/rejected": -4.6884765625, "loss": 0.746, "nll_loss": 0.7290487885475159, "rewards/accuracies": 1.0, "rewards/chosen": -0.19866758584976196, "rewards/margins": 0.2701801061630249, "rewards/rejected": -0.46884769201278687, "step": 5624 }, { "epoch": 15.40041067761807, "grad_norm": 4.044841766357422, "learning_rate": 2.2945205479452055e-07, "log_odds_chosen": 2.7521166801452637, "log_odds_ratio": -0.2751418650150299, "logits/chosen": 1.1812666654586792, "logits/rejected": 1.2182809114456177, "logps/chosen": -1.7651056051254272, "logps/rejected": -4.318634986877441, "loss": 0.4332, "nll_loss": 0.4056891202926636, "rewards/accuracies": 0.875, "rewards/chosen": -0.17651057243347168, "rewards/margins": 0.2553529143333435, "rewards/rejected": -0.4318634867668152, "step": 5625 }, { "epoch": 15.403148528405202, "grad_norm": 3.8023617267608643, "learning_rate": 2.2931506849315068e-07, "log_odds_chosen": 2.701756477355957, "log_odds_ratio": -0.2056995928287506, "logits/chosen": 1.0133306980133057, "logits/rejected": 1.0242397785186768, "logps/chosen": -1.6559233665466309, "logps/rejected": -4.206464767456055, "loss": 0.4896, "nll_loss": 0.4689934551715851, "rewards/accuracies": 1.0, "rewards/chosen": -0.16559234261512756, "rewards/margins": 0.25505417585372925, "rewards/rejected": -0.4206465184688568, "step": 5626 }, { "epoch": 15.405886379192333, "grad_norm": 3.0804171562194824, "learning_rate": 2.291780821917808e-07, "log_odds_chosen": 5.003079891204834, "log_odds_ratio": -0.0990653857588768, "logits/chosen": 1.248610496520996, "logits/rejected": 1.1923805475234985, "logps/chosen": -1.7684082984924316, "logps/rejected": -6.579102993011475, "loss": 0.5555, "nll_loss": 0.5456291437149048, "rewards/accuracies": 1.0, "rewards/chosen": -0.17684082686901093, "rewards/margins": 0.4810694456100464, "rewards/rejected": -0.6579102873802185, "step": 5627 }, { "epoch": 15.408624229979466, "grad_norm": 3.6612184047698975, "learning_rate": 2.2904109589041095e-07, "log_odds_chosen": 2.4208149909973145, "log_odds_ratio": -0.13133658468723297, "logits/chosen": 0.9584605693817139, "logits/rejected": 0.9295743703842163, "logps/chosen": -1.522773265838623, "logps/rejected": -3.7069926261901855, "loss": 0.4428, "nll_loss": 0.42968887090682983, "rewards/accuracies": 1.0, "rewards/chosen": -0.15227732062339783, "rewards/margins": 0.21842193603515625, "rewards/rejected": -0.3706992566585541, "step": 5628 }, { "epoch": 15.411362080766597, "grad_norm": 8.047053337097168, "learning_rate": 2.2890410958904108e-07, "log_odds_chosen": 2.1521522998809814, "log_odds_ratio": -0.26461565494537354, "logits/chosen": 1.197570562362671, "logits/rejected": 1.196960210800171, "logps/chosen": -2.358973979949951, "logps/rejected": -4.346405029296875, "loss": 0.5455, "nll_loss": 0.5190526843070984, "rewards/accuracies": 0.875, "rewards/chosen": -0.23589739203453064, "rewards/margins": 0.1987430900335312, "rewards/rejected": -0.434640496969223, "step": 5629 }, { "epoch": 15.41409993155373, "grad_norm": 4.087558269500732, "learning_rate": 2.2876712328767123e-07, "log_odds_chosen": 3.7992517948150635, "log_odds_ratio": -0.0756392627954483, "logits/chosen": 1.1219345331192017, "logits/rejected": 1.1516075134277344, "logps/chosen": -1.5330144166946411, "logps/rejected": -4.950973033905029, "loss": 0.6081, "nll_loss": 0.6005405783653259, "rewards/accuracies": 1.0, "rewards/chosen": -0.1533014476299286, "rewards/margins": 0.34179583191871643, "rewards/rejected": -0.495097279548645, "step": 5630 }, { "epoch": 15.416837782340863, "grad_norm": 4.13326358795166, "learning_rate": 2.2863013698630136e-07, "log_odds_chosen": 2.1782066822052, "log_odds_ratio": -0.22553430497646332, "logits/chosen": 1.1067984104156494, "logits/rejected": 1.0750420093536377, "logps/chosen": -1.1987769603729248, "logps/rejected": -3.100531578063965, "loss": 0.4144, "nll_loss": 0.3918747007846832, "rewards/accuracies": 1.0, "rewards/chosen": -0.11987769603729248, "rewards/margins": 0.19017545878887177, "rewards/rejected": -0.31005316972732544, "step": 5631 }, { "epoch": 15.419575633127995, "grad_norm": 4.93305778503418, "learning_rate": 2.284931506849315e-07, "log_odds_chosen": 3.632457971572876, "log_odds_ratio": -0.24923554062843323, "logits/chosen": 1.098310112953186, "logits/rejected": 1.0692135095596313, "logps/chosen": -2.313500165939331, "logps/rejected": -5.7880167961120605, "loss": 0.5811, "nll_loss": 0.5561864376068115, "rewards/accuracies": 0.875, "rewards/chosen": -0.23135001957416534, "rewards/margins": 0.34745168685913086, "rewards/rejected": -0.578801691532135, "step": 5632 }, { "epoch": 15.422313483915127, "grad_norm": 6.220438480377197, "learning_rate": 2.2835616438356163e-07, "log_odds_chosen": 1.433241605758667, "log_odds_ratio": -0.4916020333766937, "logits/chosen": 1.339550256729126, "logits/rejected": 1.2769559621810913, "logps/chosen": -1.6218364238739014, "logps/rejected": -2.923880100250244, "loss": 0.5269, "nll_loss": 0.4777805805206299, "rewards/accuracies": 0.875, "rewards/chosen": -0.16218364238739014, "rewards/margins": 0.13020437955856323, "rewards/rejected": -0.29238802194595337, "step": 5633 }, { "epoch": 15.425051334702259, "grad_norm": 3.5766706466674805, "learning_rate": 2.2821917808219176e-07, "log_odds_chosen": 2.2779898643493652, "log_odds_ratio": -0.19116193056106567, "logits/chosen": 1.0806902647018433, "logits/rejected": 0.9640077352523804, "logps/chosen": -1.4138410091400146, "logps/rejected": -3.369680404663086, "loss": 0.4652, "nll_loss": 0.446130633354187, "rewards/accuracies": 1.0, "rewards/chosen": -0.14138410985469818, "rewards/margins": 0.19558392465114594, "rewards/rejected": -0.3369680345058441, "step": 5634 }, { "epoch": 15.427789185489392, "grad_norm": 3.7821731567382812, "learning_rate": 2.280821917808219e-07, "log_odds_chosen": 3.927067995071411, "log_odds_ratio": -0.16128003597259521, "logits/chosen": 1.0935677289962769, "logits/rejected": 1.0647988319396973, "logps/chosen": -1.5497369766235352, "logps/rejected": -5.242123126983643, "loss": 0.6048, "nll_loss": 0.5886867046356201, "rewards/accuracies": 1.0, "rewards/chosen": -0.15497370064258575, "rewards/margins": 0.36923861503601074, "rewards/rejected": -0.5242123007774353, "step": 5635 }, { "epoch": 15.430527036276523, "grad_norm": 3.6757278442382812, "learning_rate": 2.2794520547945204e-07, "log_odds_chosen": 4.595441818237305, "log_odds_ratio": -0.11170797795057297, "logits/chosen": 1.3777532577514648, "logits/rejected": 1.4419128894805908, "logps/chosen": -1.8519459962844849, "logps/rejected": -6.254521369934082, "loss": 0.5326, "nll_loss": 0.5214297771453857, "rewards/accuracies": 0.875, "rewards/chosen": -0.18519461154937744, "rewards/margins": 0.44025757908821106, "rewards/rejected": -0.6254522204399109, "step": 5636 }, { "epoch": 15.433264887063656, "grad_norm": 3.8892300128936768, "learning_rate": 2.278082191780822e-07, "log_odds_chosen": 2.5142335891723633, "log_odds_ratio": -0.219283327460289, "logits/chosen": 1.1220934391021729, "logits/rejected": 1.054257869720459, "logps/chosen": -1.715977430343628, "logps/rejected": -4.003697395324707, "loss": 0.5362, "nll_loss": 0.5143069624900818, "rewards/accuracies": 1.0, "rewards/chosen": -0.17159774899482727, "rewards/margins": 0.22877201437950134, "rewards/rejected": -0.4003697633743286, "step": 5637 }, { "epoch": 15.436002737850787, "grad_norm": 6.87783145904541, "learning_rate": 2.2767123287671232e-07, "log_odds_chosen": 1.8263850212097168, "log_odds_ratio": -0.27153947949409485, "logits/chosen": 1.061040997505188, "logits/rejected": 1.066739559173584, "logps/chosen": -2.009310483932495, "logps/rejected": -3.704901933670044, "loss": 0.568, "nll_loss": 0.5408560633659363, "rewards/accuracies": 1.0, "rewards/chosen": -0.20093104243278503, "rewards/margins": 0.16955915093421936, "rewards/rejected": -0.3704901933670044, "step": 5638 }, { "epoch": 15.43874058863792, "grad_norm": 6.6377105712890625, "learning_rate": 2.2753424657534244e-07, "log_odds_chosen": 1.9444234371185303, "log_odds_ratio": -0.23555423319339752, "logits/chosen": 1.0834710597991943, "logits/rejected": 0.9723292589187622, "logps/chosen": -1.6321756839752197, "logps/rejected": -3.3492462635040283, "loss": 0.5239, "nll_loss": 0.5003817081451416, "rewards/accuracies": 1.0, "rewards/chosen": -0.16321757435798645, "rewards/margins": 0.17170706391334534, "rewards/rejected": -0.3349246382713318, "step": 5639 }, { "epoch": 15.441478439425051, "grad_norm": 5.870603084564209, "learning_rate": 2.273972602739726e-07, "log_odds_chosen": 2.877242088317871, "log_odds_ratio": -0.4844464659690857, "logits/chosen": 1.1226075887680054, "logits/rejected": 1.0693448781967163, "logps/chosen": -2.2266547679901123, "logps/rejected": -4.9954423904418945, "loss": 0.6721, "nll_loss": 0.6236473321914673, "rewards/accuracies": 0.75, "rewards/chosen": -0.2226654589176178, "rewards/margins": 0.27687883377075195, "rewards/rejected": -0.49954429268836975, "step": 5640 }, { "epoch": 15.444216290212184, "grad_norm": 5.561798095703125, "learning_rate": 2.2726027397260272e-07, "log_odds_chosen": 1.5998573303222656, "log_odds_ratio": -0.25796541571617126, "logits/chosen": 0.7528673410415649, "logits/rejected": 0.7086248397827148, "logps/chosen": -1.9452855587005615, "logps/rejected": -3.3331363201141357, "loss": 0.527, "nll_loss": 0.5011901259422302, "rewards/accuracies": 0.875, "rewards/chosen": -0.19452854990959167, "rewards/margins": 0.13878507912158966, "rewards/rejected": -0.33331364393234253, "step": 5641 }, { "epoch": 15.446954140999315, "grad_norm": 3.690819501876831, "learning_rate": 2.2712328767123287e-07, "log_odds_chosen": 2.834123134613037, "log_odds_ratio": -0.18200209736824036, "logits/chosen": 1.1628094911575317, "logits/rejected": 1.138603925704956, "logps/chosen": -1.7488373517990112, "logps/rejected": -4.399703502655029, "loss": 0.575, "nll_loss": 0.5568066239356995, "rewards/accuracies": 1.0, "rewards/chosen": -0.17488375306129456, "rewards/margins": 0.2650866210460663, "rewards/rejected": -0.43997034430503845, "step": 5642 }, { "epoch": 15.449691991786448, "grad_norm": 3.894845962524414, "learning_rate": 2.26986301369863e-07, "log_odds_chosen": 3.1607985496520996, "log_odds_ratio": -0.34805312752723694, "logits/chosen": 1.3138858079910278, "logits/rejected": 1.3282983303070068, "logps/chosen": -1.7588139772415161, "logps/rejected": -4.682431221008301, "loss": 0.5133, "nll_loss": 0.4785219430923462, "rewards/accuracies": 0.875, "rewards/chosen": -0.17588140070438385, "rewards/margins": 0.2923617362976074, "rewards/rejected": -0.4682431221008301, "step": 5643 }, { "epoch": 15.45242984257358, "grad_norm": 8.09626579284668, "learning_rate": 2.2684931506849315e-07, "log_odds_chosen": 1.1463391780853271, "log_odds_ratio": -1.0203641653060913, "logits/chosen": 1.0018904209136963, "logits/rejected": 0.9287288188934326, "logps/chosen": -2.9043898582458496, "logps/rejected": -3.9633352756500244, "loss": 0.702, "nll_loss": 0.5999925136566162, "rewards/accuracies": 0.625, "rewards/chosen": -0.2904389798641205, "rewards/margins": 0.1058945506811142, "rewards/rejected": -0.3963335156440735, "step": 5644 }, { "epoch": 15.455167693360712, "grad_norm": 4.500693321228027, "learning_rate": 2.267123287671233e-07, "log_odds_chosen": 3.479454278945923, "log_odds_ratio": -0.3542884588241577, "logits/chosen": 1.180448055267334, "logits/rejected": 1.0946290493011475, "logps/chosen": -2.0164904594421387, "logps/rejected": -5.305430889129639, "loss": 0.613, "nll_loss": 0.577618420124054, "rewards/accuracies": 0.75, "rewards/chosen": -0.20164906978607178, "rewards/margins": 0.3288940489292145, "rewards/rejected": -0.5305430889129639, "step": 5645 }, { "epoch": 15.457905544147843, "grad_norm": 3.4344308376312256, "learning_rate": 2.265753424657534e-07, "log_odds_chosen": 2.7528128623962402, "log_odds_ratio": -0.1978926658630371, "logits/chosen": 1.0856516361236572, "logits/rejected": 1.0055224895477295, "logps/chosen": -1.4743082523345947, "logps/rejected": -4.033797740936279, "loss": 0.4826, "nll_loss": 0.4628385305404663, "rewards/accuracies": 1.0, "rewards/chosen": -0.14743082225322723, "rewards/margins": 0.2559489607810974, "rewards/rejected": -0.40337979793548584, "step": 5646 }, { "epoch": 15.460643394934976, "grad_norm": 4.847615718841553, "learning_rate": 2.2643835616438355e-07, "log_odds_chosen": 3.562903881072998, "log_odds_ratio": -0.38386306166648865, "logits/chosen": 0.9841808676719666, "logits/rejected": 1.0898836851119995, "logps/chosen": -2.0756149291992188, "logps/rejected": -5.391746997833252, "loss": 0.6266, "nll_loss": 0.588254988193512, "rewards/accuracies": 0.875, "rewards/chosen": -0.20756150782108307, "rewards/margins": 0.3316131830215454, "rewards/rejected": -0.5391746759414673, "step": 5647 }, { "epoch": 15.463381245722108, "grad_norm": 4.012539863586426, "learning_rate": 2.2630136986301368e-07, "log_odds_chosen": 2.329636335372925, "log_odds_ratio": -0.1742229461669922, "logits/chosen": 1.3401917219161987, "logits/rejected": 1.3294318914413452, "logps/chosen": -1.7141356468200684, "logps/rejected": -3.8280446529388428, "loss": 0.4618, "nll_loss": 0.444424569606781, "rewards/accuracies": 1.0, "rewards/chosen": -0.1714135706424713, "rewards/margins": 0.2113909274339676, "rewards/rejected": -0.3828045129776001, "step": 5648 }, { "epoch": 15.46611909650924, "grad_norm": 3.5600388050079346, "learning_rate": 2.2616438356164383e-07, "log_odds_chosen": 2.360240936279297, "log_odds_ratio": -0.1673947274684906, "logits/chosen": 0.8224457502365112, "logits/rejected": 0.7488546371459961, "logps/chosen": -1.7854642868041992, "logps/rejected": -3.976698875427246, "loss": 0.4835, "nll_loss": 0.4667837619781494, "rewards/accuracies": 1.0, "rewards/chosen": -0.17854642868041992, "rewards/margins": 0.2191234827041626, "rewards/rejected": -0.3976699113845825, "step": 5649 }, { "epoch": 15.468856947296372, "grad_norm": 4.719899654388428, "learning_rate": 2.2602739726027396e-07, "log_odds_chosen": 1.636092185974121, "log_odds_ratio": -0.23958928883075714, "logits/chosen": 1.178173303604126, "logits/rejected": 1.1597955226898193, "logps/chosen": -1.7635726928710938, "logps/rejected": -3.196439266204834, "loss": 0.5466, "nll_loss": 0.5226535797119141, "rewards/accuracies": 1.0, "rewards/chosen": -0.17635726928710938, "rewards/margins": 0.14328666031360626, "rewards/rejected": -0.31964391469955444, "step": 5650 }, { "epoch": 15.471594798083505, "grad_norm": 3.7038421630859375, "learning_rate": 2.258904109589041e-07, "log_odds_chosen": 3.7210311889648438, "log_odds_ratio": -0.17552030086517334, "logits/chosen": 1.0049136877059937, "logits/rejected": 1.0676593780517578, "logps/chosen": -1.625534176826477, "logps/rejected": -5.0902299880981445, "loss": 0.4417, "nll_loss": 0.42418172955513, "rewards/accuracies": 0.875, "rewards/chosen": -0.16255342960357666, "rewards/margins": 0.34646958112716675, "rewards/rejected": -0.5090230107307434, "step": 5651 }, { "epoch": 15.474332648870636, "grad_norm": 4.702928066253662, "learning_rate": 2.2575342465753426e-07, "log_odds_chosen": 2.3775219917297363, "log_odds_ratio": -0.19845549762248993, "logits/chosen": 0.8239699602127075, "logits/rejected": 0.8008310794830322, "logps/chosen": -1.660762071609497, "logps/rejected": -3.795015811920166, "loss": 0.4295, "nll_loss": 0.409626841545105, "rewards/accuracies": 1.0, "rewards/chosen": -0.16607621312141418, "rewards/margins": 0.21342533826828003, "rewards/rejected": -0.3795015513896942, "step": 5652 }, { "epoch": 15.477070499657769, "grad_norm": 3.5744571685791016, "learning_rate": 2.2561643835616436e-07, "log_odds_chosen": 3.00968337059021, "log_odds_ratio": -0.2082711160182953, "logits/chosen": 1.0625276565551758, "logits/rejected": 1.110851764678955, "logps/chosen": -2.014902114868164, "logps/rejected": -4.885396957397461, "loss": 0.6063, "nll_loss": 0.5854922533035278, "rewards/accuracies": 1.0, "rewards/chosen": -0.20149022340774536, "rewards/margins": 0.2870495021343231, "rewards/rejected": -0.4885397255420685, "step": 5653 }, { "epoch": 15.4798083504449, "grad_norm": 3.6064505577087402, "learning_rate": 2.254794520547945e-07, "log_odds_chosen": 2.6041393280029297, "log_odds_ratio": -0.20159855484962463, "logits/chosen": 1.1915581226348877, "logits/rejected": 1.2232997417449951, "logps/chosen": -1.9602116346359253, "logps/rejected": -4.407399654388428, "loss": 0.5131, "nll_loss": 0.492928147315979, "rewards/accuracies": 1.0, "rewards/chosen": -0.196021169424057, "rewards/margins": 0.2447187900543213, "rewards/rejected": -0.4407399892807007, "step": 5654 }, { "epoch": 15.482546201232033, "grad_norm": 5.397676944732666, "learning_rate": 2.2534246575342464e-07, "log_odds_chosen": 2.9872870445251465, "log_odds_ratio": -0.19596552848815918, "logits/chosen": 0.8486611843109131, "logits/rejected": 0.7852773666381836, "logps/chosen": -2.157550096511841, "logps/rejected": -4.928258419036865, "loss": 0.6081, "nll_loss": 0.5885310769081116, "rewards/accuracies": 1.0, "rewards/chosen": -0.21575501561164856, "rewards/margins": 0.2770708203315735, "rewards/rejected": -0.49282583594322205, "step": 5655 }, { "epoch": 15.485284052019164, "grad_norm": 3.3297336101531982, "learning_rate": 2.252054794520548e-07, "log_odds_chosen": 2.818307399749756, "log_odds_ratio": -0.12914294004440308, "logits/chosen": 1.0698907375335693, "logits/rejected": 1.0904932022094727, "logps/chosen": -1.7436609268188477, "logps/rejected": -4.313662528991699, "loss": 0.4808, "nll_loss": 0.4679229259490967, "rewards/accuracies": 1.0, "rewards/chosen": -0.1743660867214203, "rewards/margins": 0.2570001780986786, "rewards/rejected": -0.4313662350177765, "step": 5656 }, { "epoch": 15.488021902806297, "grad_norm": 4.355350494384766, "learning_rate": 2.2506849315068494e-07, "log_odds_chosen": 1.6816644668579102, "log_odds_ratio": -0.34565845131874084, "logits/chosen": 1.3111544847488403, "logits/rejected": 1.2874315977096558, "logps/chosen": -1.8200924396514893, "logps/rejected": -3.3459033966064453, "loss": 0.5793, "nll_loss": 0.5447438955307007, "rewards/accuracies": 0.875, "rewards/chosen": -0.1820092499256134, "rewards/margins": 0.1525810956954956, "rewards/rejected": -0.334590345621109, "step": 5657 }, { "epoch": 15.49075975359343, "grad_norm": 3.5763208866119385, "learning_rate": 2.2493150684931507e-07, "log_odds_chosen": 4.03645133972168, "log_odds_ratio": -0.04461097717285156, "logits/chosen": 1.006751537322998, "logits/rejected": 1.0204395055770874, "logps/chosen": -2.207653522491455, "logps/rejected": -6.068015098571777, "loss": 0.6172, "nll_loss": 0.6127148270606995, "rewards/accuracies": 1.0, "rewards/chosen": -0.22076532244682312, "rewards/margins": 0.3860362470149994, "rewards/rejected": -0.6068015694618225, "step": 5658 }, { "epoch": 15.493497604380561, "grad_norm": 4.7866597175598145, "learning_rate": 2.247945205479452e-07, "log_odds_chosen": 1.595743179321289, "log_odds_ratio": -0.3210957944393158, "logits/chosen": 1.3636581897735596, "logits/rejected": 1.319491982460022, "logps/chosen": -1.659001350402832, "logps/rejected": -3.063676595687866, "loss": 0.4503, "nll_loss": 0.4182285964488983, "rewards/accuracies": 0.875, "rewards/chosen": -0.1659001260995865, "rewards/margins": 0.14046750962734222, "rewards/rejected": -0.3063676655292511, "step": 5659 }, { "epoch": 15.496235455167694, "grad_norm": 3.398153781890869, "learning_rate": 2.2465753424657532e-07, "log_odds_chosen": 2.097043037414551, "log_odds_ratio": -0.2101384550333023, "logits/chosen": 1.125724196434021, "logits/rejected": 1.104966640472412, "logps/chosen": -1.6759411096572876, "logps/rejected": -3.521111488342285, "loss": 0.4219, "nll_loss": 0.4008805751800537, "rewards/accuracies": 1.0, "rewards/chosen": -0.16759411990642548, "rewards/margins": 0.1845170557498932, "rewards/rejected": -0.35211116075515747, "step": 5660 }, { "epoch": 15.498973305954825, "grad_norm": 4.886146068572998, "learning_rate": 2.2452054794520547e-07, "log_odds_chosen": 3.5130178928375244, "log_odds_ratio": -0.21092431247234344, "logits/chosen": 1.3787435293197632, "logits/rejected": 1.378179669380188, "logps/chosen": -2.438138961791992, "logps/rejected": -5.84356689453125, "loss": 0.6719, "nll_loss": 0.6508207321166992, "rewards/accuracies": 0.875, "rewards/chosen": -0.2438139021396637, "rewards/margins": 0.34054282307624817, "rewards/rejected": -0.5843567252159119, "step": 5661 }, { "epoch": 15.501711156741958, "grad_norm": 5.964033126831055, "learning_rate": 2.243835616438356e-07, "log_odds_chosen": 2.7539219856262207, "log_odds_ratio": -0.11957297474145889, "logits/chosen": 1.1848223209381104, "logits/rejected": 1.1100722551345825, "logps/chosen": -1.6170940399169922, "logps/rejected": -3.9468414783477783, "loss": 0.5044, "nll_loss": 0.4924224615097046, "rewards/accuracies": 1.0, "rewards/chosen": -0.16170941293239594, "rewards/margins": 0.23297475278377533, "rewards/rejected": -0.39468416571617126, "step": 5662 }, { "epoch": 15.50444900752909, "grad_norm": 6.405371189117432, "learning_rate": 2.2424657534246575e-07, "log_odds_chosen": 2.5004405975341797, "log_odds_ratio": -0.3767031729221344, "logits/chosen": 0.9945095181465149, "logits/rejected": 0.986564576625824, "logps/chosen": -2.6919775009155273, "logps/rejected": -5.1063079833984375, "loss": 0.6308, "nll_loss": 0.5930891633033752, "rewards/accuracies": 0.875, "rewards/chosen": -0.2691977620124817, "rewards/margins": 0.2414330691099167, "rewards/rejected": -0.5106307864189148, "step": 5663 }, { "epoch": 15.507186858316222, "grad_norm": 3.9608418941497803, "learning_rate": 2.241095890410959e-07, "log_odds_chosen": 1.6350417137145996, "log_odds_ratio": -0.2255098819732666, "logits/chosen": 1.4090826511383057, "logits/rejected": 1.3849736452102661, "logps/chosen": -1.5132496356964111, "logps/rejected": -2.942416191101074, "loss": 0.4183, "nll_loss": 0.3957439661026001, "rewards/accuracies": 1.0, "rewards/chosen": -0.15132495760917664, "rewards/margins": 0.14291664958000183, "rewards/rejected": -0.29424160718917847, "step": 5664 }, { "epoch": 15.509924709103354, "grad_norm": 4.319692611694336, "learning_rate": 2.2397260273972602e-07, "log_odds_chosen": 2.805455207824707, "log_odds_ratio": -0.36927324533462524, "logits/chosen": 0.8503671884536743, "logits/rejected": 0.8749744892120361, "logps/chosen": -2.468177080154419, "logps/rejected": -5.137657642364502, "loss": 0.6545, "nll_loss": 0.6175640821456909, "rewards/accuracies": 0.875, "rewards/chosen": -0.2468177229166031, "rewards/margins": 0.26694804430007935, "rewards/rejected": -0.513765811920166, "step": 5665 }, { "epoch": 15.512662559890487, "grad_norm": 3.841055154800415, "learning_rate": 2.2383561643835615e-07, "log_odds_chosen": 3.4383888244628906, "log_odds_ratio": -0.18115320801734924, "logits/chosen": 1.1287894248962402, "logits/rejected": 1.1783872842788696, "logps/chosen": -2.2398078441619873, "logps/rejected": -5.514599323272705, "loss": 0.6621, "nll_loss": 0.6439985632896423, "rewards/accuracies": 1.0, "rewards/chosen": -0.22398078441619873, "rewards/margins": 0.32747912406921387, "rewards/rejected": -0.5514599084854126, "step": 5666 }, { "epoch": 15.515400410677618, "grad_norm": 3.895993947982788, "learning_rate": 2.2369863013698628e-07, "log_odds_chosen": 3.891679525375366, "log_odds_ratio": -0.12870435416698456, "logits/chosen": 1.0573453903198242, "logits/rejected": 1.0564957857131958, "logps/chosen": -2.6193325519561768, "logps/rejected": -6.357147216796875, "loss": 0.6158, "nll_loss": 0.6029173135757446, "rewards/accuracies": 1.0, "rewards/chosen": -0.26193326711654663, "rewards/margins": 0.3737815320491791, "rewards/rejected": -0.6357147693634033, "step": 5667 }, { "epoch": 15.51813826146475, "grad_norm": 4.115665435791016, "learning_rate": 2.2356164383561643e-07, "log_odds_chosen": 4.584416389465332, "log_odds_ratio": -0.10719886422157288, "logits/chosen": 1.1722207069396973, "logits/rejected": 1.1836315393447876, "logps/chosen": -2.265958786010742, "logps/rejected": -6.73011589050293, "loss": 0.5651, "nll_loss": 0.5544027090072632, "rewards/accuracies": 1.0, "rewards/chosen": -0.22659587860107422, "rewards/margins": 0.4464157819747925, "rewards/rejected": -0.6730116009712219, "step": 5668 }, { "epoch": 15.520876112251882, "grad_norm": 3.9782555103302, "learning_rate": 2.2342465753424655e-07, "log_odds_chosen": 1.7064955234527588, "log_odds_ratio": -0.315428763628006, "logits/chosen": 0.8976799249649048, "logits/rejected": 0.793836772441864, "logps/chosen": -1.5596829652786255, "logps/rejected": -3.1184446811676025, "loss": 0.468, "nll_loss": 0.4364791512489319, "rewards/accuracies": 0.875, "rewards/chosen": -0.1559682935476303, "rewards/margins": 0.15587617456912994, "rewards/rejected": -0.31184443831443787, "step": 5669 }, { "epoch": 15.523613963039015, "grad_norm": 3.7816338539123535, "learning_rate": 2.232876712328767e-07, "log_odds_chosen": 4.610114097595215, "log_odds_ratio": -0.17866253852844238, "logits/chosen": 1.2528142929077148, "logits/rejected": 1.3115609884262085, "logps/chosen": -2.1151692867279053, "logps/rejected": -6.51607084274292, "loss": 0.684, "nll_loss": 0.666106104850769, "rewards/accuracies": 1.0, "rewards/chosen": -0.21151691675186157, "rewards/margins": 0.440090149641037, "rewards/rejected": -0.651607096195221, "step": 5670 }, { "epoch": 15.526351813826146, "grad_norm": 3.9595115184783936, "learning_rate": 2.2315068493150686e-07, "log_odds_chosen": 3.6786248683929443, "log_odds_ratio": -0.22225132584571838, "logits/chosen": 1.0134453773498535, "logits/rejected": 1.074066400527954, "logps/chosen": -2.197509765625, "logps/rejected": -5.6459455490112305, "loss": 0.6394, "nll_loss": 0.6172118186950684, "rewards/accuracies": 0.875, "rewards/chosen": -0.21975098550319672, "rewards/margins": 0.34484362602233887, "rewards/rejected": -0.5645946264266968, "step": 5671 }, { "epoch": 15.529089664613279, "grad_norm": 9.058950424194336, "learning_rate": 2.2301369863013698e-07, "log_odds_chosen": 3.566988468170166, "log_odds_ratio": -0.168455570936203, "logits/chosen": 1.3712000846862793, "logits/rejected": 1.2694859504699707, "logps/chosen": -2.0900702476501465, "logps/rejected": -5.4819231033325195, "loss": 0.5809, "nll_loss": 0.5640617609024048, "rewards/accuracies": 1.0, "rewards/chosen": -0.20900702476501465, "rewards/margins": 0.33918532729148865, "rewards/rejected": -0.5481923818588257, "step": 5672 }, { "epoch": 15.53182751540041, "grad_norm": 4.252236366271973, "learning_rate": 2.228767123287671e-07, "log_odds_chosen": 2.8793327808380127, "log_odds_ratio": -0.20710636675357819, "logits/chosen": 1.0211001634597778, "logits/rejected": 1.0246813297271729, "logps/chosen": -2.240516424179077, "logps/rejected": -4.961112976074219, "loss": 0.6287, "nll_loss": 0.6079715490341187, "rewards/accuracies": 1.0, "rewards/chosen": -0.22405165433883667, "rewards/margins": 0.2720596492290497, "rewards/rejected": -0.49611133337020874, "step": 5673 }, { "epoch": 15.534565366187543, "grad_norm": 4.0235209465026855, "learning_rate": 2.2273972602739723e-07, "log_odds_chosen": 2.6235907077789307, "log_odds_ratio": -0.2590171694755554, "logits/chosen": 1.1632606983184814, "logits/rejected": 1.1670150756835938, "logps/chosen": -1.8242607116699219, "logps/rejected": -4.336549758911133, "loss": 0.5881, "nll_loss": 0.5621581077575684, "rewards/accuracies": 0.875, "rewards/chosen": -0.1824260652065277, "rewards/margins": 0.251228928565979, "rewards/rejected": -0.4336549937725067, "step": 5674 }, { "epoch": 15.537303216974674, "grad_norm": 3.65816330909729, "learning_rate": 2.2260273972602739e-07, "log_odds_chosen": 2.0401253700256348, "log_odds_ratio": -0.2676500976085663, "logits/chosen": 0.8038181066513062, "logits/rejected": 0.7322250604629517, "logps/chosen": -1.3779206275939941, "logps/rejected": -3.117168426513672, "loss": 0.5665, "nll_loss": 0.539754331111908, "rewards/accuracies": 1.0, "rewards/chosen": -0.13779208064079285, "rewards/margins": 0.1739247739315033, "rewards/rejected": -0.31171685457229614, "step": 5675 }, { "epoch": 15.540041067761807, "grad_norm": 3.365866184234619, "learning_rate": 2.2246575342465754e-07, "log_odds_chosen": 3.8803513050079346, "log_odds_ratio": -0.11017540842294693, "logits/chosen": 1.047965168952942, "logits/rejected": 1.0037996768951416, "logps/chosen": -2.0177881717681885, "logps/rejected": -5.756009101867676, "loss": 0.5723, "nll_loss": 0.5613006949424744, "rewards/accuracies": 1.0, "rewards/chosen": -0.20177879929542542, "rewards/margins": 0.37382209300994873, "rewards/rejected": -0.5756008625030518, "step": 5676 }, { "epoch": 15.542778918548938, "grad_norm": 3.956806182861328, "learning_rate": 2.2232876712328766e-07, "log_odds_chosen": 4.024762153625488, "log_odds_ratio": -0.1303308606147766, "logits/chosen": 1.286869764328003, "logits/rejected": 1.3286429643630981, "logps/chosen": -2.2048144340515137, "logps/rejected": -6.091414451599121, "loss": 0.4941, "nll_loss": 0.48111099004745483, "rewards/accuracies": 1.0, "rewards/chosen": -0.22048142552375793, "rewards/margins": 0.3886599838733673, "rewards/rejected": -0.6091413497924805, "step": 5677 }, { "epoch": 15.545516769336071, "grad_norm": 4.967604637145996, "learning_rate": 2.2219178082191782e-07, "log_odds_chosen": 0.9716890454292297, "log_odds_ratio": -0.4077700972557068, "logits/chosen": 1.070610761642456, "logits/rejected": 1.0853204727172852, "logps/chosen": -2.440925359725952, "logps/rejected": -3.3384952545166016, "loss": 0.517, "nll_loss": 0.47620469331741333, "rewards/accuracies": 0.875, "rewards/chosen": -0.24409253895282745, "rewards/margins": 0.08975698798894882, "rewards/rejected": -0.33384954929351807, "step": 5678 }, { "epoch": 15.548254620123203, "grad_norm": 4.394670009613037, "learning_rate": 2.2205479452054792e-07, "log_odds_chosen": 1.9551838636398315, "log_odds_ratio": -0.2339298576116562, "logits/chosen": 1.1488145589828491, "logits/rejected": 1.0289945602416992, "logps/chosen": -1.6319526433944702, "logps/rejected": -3.3844447135925293, "loss": 0.5146, "nll_loss": 0.49118486046791077, "rewards/accuracies": 1.0, "rewards/chosen": -0.16319526731967926, "rewards/margins": 0.17524921894073486, "rewards/rejected": -0.3384445011615753, "step": 5679 }, { "epoch": 15.550992470910336, "grad_norm": 7.466022491455078, "learning_rate": 2.2191780821917807e-07, "log_odds_chosen": 2.142040491104126, "log_odds_ratio": -0.28286802768707275, "logits/chosen": 1.2801313400268555, "logits/rejected": 1.2494128942489624, "logps/chosen": -2.0251364707946777, "logps/rejected": -4.024282455444336, "loss": 0.6011, "nll_loss": 0.5728579759597778, "rewards/accuracies": 1.0, "rewards/chosen": -0.2025136649608612, "rewards/margins": 0.1999145746231079, "rewards/rejected": -0.4024282395839691, "step": 5680 }, { "epoch": 15.553730321697467, "grad_norm": 3.3359646797180176, "learning_rate": 2.217808219178082e-07, "log_odds_chosen": 2.2491815090179443, "log_odds_ratio": -0.137807697057724, "logits/chosen": 0.9621849060058594, "logits/rejected": 0.8452651500701904, "logps/chosen": -1.5433650016784668, "logps/rejected": -3.5211405754089355, "loss": 0.5531, "nll_loss": 0.5392837524414062, "rewards/accuracies": 1.0, "rewards/chosen": -0.15433651208877563, "rewards/margins": 0.19777753949165344, "rewards/rejected": -0.3521140515804291, "step": 5681 }, { "epoch": 15.5564681724846, "grad_norm": 3.83707857131958, "learning_rate": 2.2164383561643835e-07, "log_odds_chosen": 1.7352674007415771, "log_odds_ratio": -0.2574111223220825, "logits/chosen": 1.4661815166473389, "logits/rejected": 1.4195847511291504, "logps/chosen": -1.3996844291687012, "logps/rejected": -2.895026445388794, "loss": 0.4822, "nll_loss": 0.45643699169158936, "rewards/accuracies": 1.0, "rewards/chosen": -0.13996843993663788, "rewards/margins": 0.1495341956615448, "rewards/rejected": -0.28950265049934387, "step": 5682 }, { "epoch": 15.55920602327173, "grad_norm": 3.7799148559570312, "learning_rate": 2.215068493150685e-07, "log_odds_chosen": 1.5676374435424805, "log_odds_ratio": -0.2695995271205902, "logits/chosen": 1.0104401111602783, "logits/rejected": 0.9493076801300049, "logps/chosen": -1.7256464958190918, "logps/rejected": -3.1395504474639893, "loss": 0.4511, "nll_loss": 0.4241180419921875, "rewards/accuracies": 1.0, "rewards/chosen": -0.17256464064121246, "rewards/margins": 0.14139039814472198, "rewards/rejected": -0.31395503878593445, "step": 5683 }, { "epoch": 15.561943874058864, "grad_norm": 3.4364655017852783, "learning_rate": 2.2136986301369862e-07, "log_odds_chosen": 3.1080093383789062, "log_odds_ratio": -0.2354409098625183, "logits/chosen": 1.0921049118041992, "logits/rejected": 1.02092707157135, "logps/chosen": -1.693594217300415, "logps/rejected": -4.607257843017578, "loss": 0.565, "nll_loss": 0.541490375995636, "rewards/accuracies": 1.0, "rewards/chosen": -0.16935943067073822, "rewards/margins": 0.2913663685321808, "rewards/rejected": -0.4607257843017578, "step": 5684 }, { "epoch": 15.564681724845997, "grad_norm": 3.0844106674194336, "learning_rate": 2.2123287671232877e-07, "log_odds_chosen": 2.6328845024108887, "log_odds_ratio": -0.11946742236614227, "logits/chosen": 0.8833773136138916, "logits/rejected": 0.8941302299499512, "logps/chosen": -1.5123486518859863, "logps/rejected": -3.8973782062530518, "loss": 0.4832, "nll_loss": 0.47129783034324646, "rewards/accuracies": 1.0, "rewards/chosen": -0.15123486518859863, "rewards/margins": 0.23850296437740326, "rewards/rejected": -0.3897378444671631, "step": 5685 }, { "epoch": 15.567419575633128, "grad_norm": 8.398863792419434, "learning_rate": 2.2109589041095887e-07, "log_odds_chosen": 0.7651057839393616, "log_odds_ratio": -0.6096057295799255, "logits/chosen": 1.1329710483551025, "logits/rejected": 1.1888073682785034, "logps/chosen": -2.882443904876709, "logps/rejected": -3.5610525608062744, "loss": 0.6498, "nll_loss": 0.5888601541519165, "rewards/accuracies": 0.875, "rewards/chosen": -0.288244366645813, "rewards/margins": 0.06786088645458221, "rewards/rejected": -0.3561052680015564, "step": 5686 }, { "epoch": 15.570157426420261, "grad_norm": 5.866726398468018, "learning_rate": 2.2095890410958903e-07, "log_odds_chosen": 4.6361260414123535, "log_odds_ratio": -0.31761419773101807, "logits/chosen": 1.2888529300689697, "logits/rejected": 1.3496028184890747, "logps/chosen": -1.8098136186599731, "logps/rejected": -6.165022850036621, "loss": 0.7143, "nll_loss": 0.6825310587882996, "rewards/accuracies": 0.875, "rewards/chosen": -0.1809813529253006, "rewards/margins": 0.4355209469795227, "rewards/rejected": -0.6165022850036621, "step": 5687 }, { "epoch": 15.572895277207392, "grad_norm": 4.463405132293701, "learning_rate": 2.2082191780821915e-07, "log_odds_chosen": 1.6498280763626099, "log_odds_ratio": -0.4758467674255371, "logits/chosen": 1.1531109809875488, "logits/rejected": 1.150123119354248, "logps/chosen": -2.4054253101348877, "logps/rejected": -4.022525310516357, "loss": 0.572, "nll_loss": 0.5243899822235107, "rewards/accuracies": 0.625, "rewards/chosen": -0.24054253101348877, "rewards/margins": 0.1617100089788437, "rewards/rejected": -0.40225252509117126, "step": 5688 }, { "epoch": 15.575633127994525, "grad_norm": 4.031620979309082, "learning_rate": 2.206849315068493e-07, "log_odds_chosen": 2.398742198944092, "log_odds_ratio": -0.31974470615386963, "logits/chosen": 1.1424006223678589, "logits/rejected": 1.083682894706726, "logps/chosen": -1.8790867328643799, "logps/rejected": -4.184144973754883, "loss": 0.574, "nll_loss": 0.5419816970825195, "rewards/accuracies": 0.875, "rewards/chosen": -0.18790869414806366, "rewards/margins": 0.2305057942867279, "rewards/rejected": -0.41841450333595276, "step": 5689 }, { "epoch": 15.578370978781656, "grad_norm": 4.158841609954834, "learning_rate": 2.2054794520547946e-07, "log_odds_chosen": 3.355799674987793, "log_odds_ratio": -0.15824627876281738, "logits/chosen": 1.06321120262146, "logits/rejected": 1.1299540996551514, "logps/chosen": -2.5386533737182617, "logps/rejected": -5.783481597900391, "loss": 0.6472, "nll_loss": 0.6313389539718628, "rewards/accuracies": 1.0, "rewards/chosen": -0.2538653612136841, "rewards/margins": 0.32448285818099976, "rewards/rejected": -0.5783482193946838, "step": 5690 }, { "epoch": 15.58110882956879, "grad_norm": 4.238918304443359, "learning_rate": 2.2041095890410958e-07, "log_odds_chosen": 1.4858407974243164, "log_odds_ratio": -0.30408167839050293, "logits/chosen": 0.7846169471740723, "logits/rejected": 0.6496435403823853, "logps/chosen": -1.8144683837890625, "logps/rejected": -3.125351905822754, "loss": 0.4764, "nll_loss": 0.44597378373146057, "rewards/accuracies": 1.0, "rewards/chosen": -0.18144682049751282, "rewards/margins": 0.13108836114406586, "rewards/rejected": -0.3125351667404175, "step": 5691 }, { "epoch": 15.58384668035592, "grad_norm": 3.2775704860687256, "learning_rate": 2.2027397260273973e-07, "log_odds_chosen": 4.305632591247559, "log_odds_ratio": -0.06065824627876282, "logits/chosen": 1.3077983856201172, "logits/rejected": 1.3228697776794434, "logps/chosen": -2.1719818115234375, "logps/rejected": -6.347622871398926, "loss": 0.5407, "nll_loss": 0.5345910787582397, "rewards/accuracies": 1.0, "rewards/chosen": -0.2171981930732727, "rewards/margins": 0.41756415367126465, "rewards/rejected": -0.6347622871398926, "step": 5692 }, { "epoch": 15.586584531143053, "grad_norm": 3.8349006175994873, "learning_rate": 2.2013698630136983e-07, "log_odds_chosen": 6.916288375854492, "log_odds_ratio": -0.07736536115407944, "logits/chosen": 1.0666340589523315, "logits/rejected": 1.0889317989349365, "logps/chosen": -2.4801995754241943, "logps/rejected": -9.245818138122559, "loss": 0.6815, "nll_loss": 0.6737221479415894, "rewards/accuracies": 1.0, "rewards/chosen": -0.2480199635028839, "rewards/margins": 0.6765618920326233, "rewards/rejected": -0.9245818853378296, "step": 5693 }, { "epoch": 15.589322381930184, "grad_norm": 4.333177089691162, "learning_rate": 2.1999999999999998e-07, "log_odds_chosen": 3.787226438522339, "log_odds_ratio": -0.2122204750776291, "logits/chosen": 0.9485622644424438, "logits/rejected": 1.0238816738128662, "logps/chosen": -2.4388108253479004, "logps/rejected": -6.149160861968994, "loss": 0.7411, "nll_loss": 0.7198548913002014, "rewards/accuracies": 0.875, "rewards/chosen": -0.24388110637664795, "rewards/margins": 0.37103497982025146, "rewards/rejected": -0.6149160861968994, "step": 5694 }, { "epoch": 15.592060232717317, "grad_norm": 3.491654396057129, "learning_rate": 2.1986301369863014e-07, "log_odds_chosen": 1.8702025413513184, "log_odds_ratio": -0.27285507321357727, "logits/chosen": 0.9880926012992859, "logits/rejected": 1.0009649991989136, "logps/chosen": -1.6472755670547485, "logps/rejected": -3.36427640914917, "loss": 0.4375, "nll_loss": 0.4101773798465729, "rewards/accuracies": 1.0, "rewards/chosen": -0.1647275686264038, "rewards/margins": 0.17170007526874542, "rewards/rejected": -0.33642762899398804, "step": 5695 }, { "epoch": 15.594798083504449, "grad_norm": 4.456760406494141, "learning_rate": 2.1972602739726026e-07, "log_odds_chosen": 1.8632299900054932, "log_odds_ratio": -0.35190349817276, "logits/chosen": 1.1945379972457886, "logits/rejected": 1.1880589723587036, "logps/chosen": -2.2696619033813477, "logps/rejected": -3.9774692058563232, "loss": 0.7006, "nll_loss": 0.665398359298706, "rewards/accuracies": 0.875, "rewards/chosen": -0.22696618735790253, "rewards/margins": 0.1707807332277298, "rewards/rejected": -0.3977469205856323, "step": 5696 }, { "epoch": 15.597535934291582, "grad_norm": 3.8461720943450928, "learning_rate": 2.1958904109589041e-07, "log_odds_chosen": 2.036935329437256, "log_odds_ratio": -0.284135103225708, "logits/chosen": 1.4518307447433472, "logits/rejected": 1.4404970407485962, "logps/chosen": -1.7608058452606201, "logps/rejected": -3.6379570960998535, "loss": 0.5325, "nll_loss": 0.5040621161460876, "rewards/accuracies": 0.875, "rewards/chosen": -0.176080584526062, "rewards/margins": 0.18771511316299438, "rewards/rejected": -0.3637956976890564, "step": 5697 }, { "epoch": 15.600273785078713, "grad_norm": 3.596717357635498, "learning_rate": 2.1945205479452054e-07, "log_odds_chosen": 2.8265879154205322, "log_odds_ratio": -0.21195559203624725, "logits/chosen": 1.156097650527954, "logits/rejected": 1.0888614654541016, "logps/chosen": -1.7573919296264648, "logps/rejected": -4.432706832885742, "loss": 0.5035, "nll_loss": 0.48225486278533936, "rewards/accuracies": 0.875, "rewards/chosen": -0.17573919892311096, "rewards/margins": 0.26753148436546326, "rewards/rejected": -0.4432706832885742, "step": 5698 }, { "epoch": 15.603011635865846, "grad_norm": 3.3708622455596924, "learning_rate": 2.1931506849315067e-07, "log_odds_chosen": 3.2187418937683105, "log_odds_ratio": -0.197653666138649, "logits/chosen": 1.1741917133331299, "logits/rejected": 1.1583812236785889, "logps/chosen": -1.8457741737365723, "logps/rejected": -4.916658878326416, "loss": 0.5954, "nll_loss": 0.5756770372390747, "rewards/accuracies": 0.875, "rewards/chosen": -0.18457743525505066, "rewards/margins": 0.3070884644985199, "rewards/rejected": -0.49166589975357056, "step": 5699 }, { "epoch": 15.605749486652977, "grad_norm": 4.848728656768799, "learning_rate": 2.191780821917808e-07, "log_odds_chosen": 3.5816025733947754, "log_odds_ratio": -0.2421056479215622, "logits/chosen": 1.1767452955245972, "logits/rejected": 1.2735216617584229, "logps/chosen": -3.035938262939453, "logps/rejected": -6.490230083465576, "loss": 0.6101, "nll_loss": 0.5858405828475952, "rewards/accuracies": 0.875, "rewards/chosen": -0.30359384417533875, "rewards/margins": 0.3454291820526123, "rewards/rejected": -0.6490230560302734, "step": 5700 }, { "epoch": 15.60848733744011, "grad_norm": 6.600281238555908, "learning_rate": 2.1904109589041094e-07, "log_odds_chosen": 0.7886801958084106, "log_odds_ratio": -0.6028597354888916, "logits/chosen": 0.8335245847702026, "logits/rejected": 0.8464794158935547, "logps/chosen": -2.084851026535034, "logps/rejected": -2.6992075443267822, "loss": 0.5583, "nll_loss": 0.4980614483356476, "rewards/accuracies": 0.75, "rewards/chosen": -0.20848511159420013, "rewards/margins": 0.06143565848469734, "rewards/rejected": -0.2699207663536072, "step": 5701 }, { "epoch": 15.611225188227241, "grad_norm": 3.275540351867676, "learning_rate": 2.189041095890411e-07, "log_odds_chosen": 3.3401753902435303, "log_odds_ratio": -0.07615210115909576, "logits/chosen": 0.9121576547622681, "logits/rejected": 0.9005059003829956, "logps/chosen": -1.394934892654419, "logps/rejected": -4.432700157165527, "loss": 0.485, "nll_loss": 0.4774033725261688, "rewards/accuracies": 1.0, "rewards/chosen": -0.13949349522590637, "rewards/margins": 0.3037765324115753, "rewards/rejected": -0.4432699978351593, "step": 5702 }, { "epoch": 15.613963039014374, "grad_norm": 7.770473480224609, "learning_rate": 2.1876712328767122e-07, "log_odds_chosen": 0.7956463098526001, "log_odds_ratio": -0.5249998569488525, "logits/chosen": 1.3328094482421875, "logits/rejected": 1.2184662818908691, "logps/chosen": -2.261230707168579, "logps/rejected": -2.9655396938323975, "loss": 0.6425, "nll_loss": 0.5900116562843323, "rewards/accuracies": 0.75, "rewards/chosen": -0.22612306475639343, "rewards/margins": 0.07043091952800751, "rewards/rejected": -0.29655396938323975, "step": 5703 }, { "epoch": 15.616700889801505, "grad_norm": 4.989352703094482, "learning_rate": 2.1863013698630137e-07, "log_odds_chosen": 1.7011213302612305, "log_odds_ratio": -0.3057912588119507, "logits/chosen": 1.0871033668518066, "logits/rejected": 1.1327617168426514, "logps/chosen": -3.1301169395446777, "logps/rejected": -4.738103866577148, "loss": 0.6174, "nll_loss": 0.5868105888366699, "rewards/accuracies": 0.875, "rewards/chosen": -0.31301170587539673, "rewards/margins": 0.16079869866371155, "rewards/rejected": -0.4738103747367859, "step": 5704 }, { "epoch": 15.619438740588638, "grad_norm": 3.7740371227264404, "learning_rate": 2.184931506849315e-07, "log_odds_chosen": 2.5266313552856445, "log_odds_ratio": -0.2969697117805481, "logits/chosen": 1.0765182971954346, "logits/rejected": 1.0842266082763672, "logps/chosen": -2.036468982696533, "logps/rejected": -4.305024147033691, "loss": 0.4647, "nll_loss": 0.43499571084976196, "rewards/accuracies": 0.875, "rewards/chosen": -0.20364689826965332, "rewards/margins": 0.22685550153255463, "rewards/rejected": -0.43050241470336914, "step": 5705 }, { "epoch": 15.62217659137577, "grad_norm": 3.6216530799865723, "learning_rate": 2.1835616438356162e-07, "log_odds_chosen": 4.130041599273682, "log_odds_ratio": -0.23124746978282928, "logits/chosen": 1.247308373451233, "logits/rejected": 1.237291693687439, "logps/chosen": -1.5847855806350708, "logps/rejected": -5.55352783203125, "loss": 0.5105, "nll_loss": 0.48735612630844116, "rewards/accuracies": 0.875, "rewards/chosen": -0.15847855806350708, "rewards/margins": 0.39687421917915344, "rewards/rejected": -0.5553528070449829, "step": 5706 }, { "epoch": 15.624914442162902, "grad_norm": 4.022686004638672, "learning_rate": 2.1821917808219175e-07, "log_odds_chosen": 2.3889763355255127, "log_odds_ratio": -0.16710802912712097, "logits/chosen": 1.067428469657898, "logits/rejected": 1.0082213878631592, "logps/chosen": -1.5156869888305664, "logps/rejected": -3.680420398712158, "loss": 0.4575, "nll_loss": 0.44078147411346436, "rewards/accuracies": 1.0, "rewards/chosen": -0.1515686959028244, "rewards/margins": 0.21647332608699799, "rewards/rejected": -0.3680420517921448, "step": 5707 }, { "epoch": 15.627652292950033, "grad_norm": 3.961550712585449, "learning_rate": 2.180821917808219e-07, "log_odds_chosen": 4.0359721183776855, "log_odds_ratio": -0.0653032660484314, "logits/chosen": 1.1302694082260132, "logits/rejected": 1.1040359735488892, "logps/chosen": -1.6191649436950684, "logps/rejected": -5.3371901512146, "loss": 0.437, "nll_loss": 0.43044722080230713, "rewards/accuracies": 1.0, "rewards/chosen": -0.16191649436950684, "rewards/margins": 0.37180256843566895, "rewards/rejected": -0.5337190628051758, "step": 5708 }, { "epoch": 15.630390143737166, "grad_norm": 4.02998161315918, "learning_rate": 2.1794520547945205e-07, "log_odds_chosen": 1.9153480529785156, "log_odds_ratio": -0.26841264963150024, "logits/chosen": 1.2015513181686401, "logits/rejected": 1.174156665802002, "logps/chosen": -2.1210484504699707, "logps/rejected": -3.912006378173828, "loss": 0.5865, "nll_loss": 0.559686005115509, "rewards/accuracies": 1.0, "rewards/chosen": -0.21210485696792603, "rewards/margins": 0.1790957897901535, "rewards/rejected": -0.3912006616592407, "step": 5709 }, { "epoch": 15.633127994524298, "grad_norm": 4.153909683227539, "learning_rate": 2.1780821917808218e-07, "log_odds_chosen": 2.9498822689056396, "log_odds_ratio": -0.17219671607017517, "logits/chosen": 0.9851207733154297, "logits/rejected": 0.9128116369247437, "logps/chosen": -1.5081956386566162, "logps/rejected": -4.220945358276367, "loss": 0.5322, "nll_loss": 0.5149789452552795, "rewards/accuracies": 1.0, "rewards/chosen": -0.1508195698261261, "rewards/margins": 0.27127498388290405, "rewards/rejected": -0.42209452390670776, "step": 5710 }, { "epoch": 15.63586584531143, "grad_norm": 3.2345826625823975, "learning_rate": 2.1767123287671233e-07, "log_odds_chosen": 3.043659210205078, "log_odds_ratio": -0.15508928894996643, "logits/chosen": 1.1098493337631226, "logits/rejected": 1.0776426792144775, "logps/chosen": -1.8029465675354004, "logps/rejected": -4.678585052490234, "loss": 0.4802, "nll_loss": 0.46464109420776367, "rewards/accuracies": 1.0, "rewards/chosen": -0.18029466271400452, "rewards/margins": 0.28756383061408997, "rewards/rejected": -0.4678584933280945, "step": 5711 }, { "epoch": 15.638603696098563, "grad_norm": 3.626452922821045, "learning_rate": 2.1753424657534246e-07, "log_odds_chosen": 4.225581169128418, "log_odds_ratio": -0.1113027036190033, "logits/chosen": 0.837746798992157, "logits/rejected": 0.8706974983215332, "logps/chosen": -2.271749973297119, "logps/rejected": -6.264217376708984, "loss": 0.5915, "nll_loss": 0.5803753137588501, "rewards/accuracies": 1.0, "rewards/chosen": -0.22717499732971191, "rewards/margins": 0.3992467224597931, "rewards/rejected": -0.6264217495918274, "step": 5712 }, { "epoch": 15.641341546885695, "grad_norm": 3.560744047164917, "learning_rate": 2.1739726027397258e-07, "log_odds_chosen": 4.1936469078063965, "log_odds_ratio": -0.09868321567773819, "logits/chosen": 1.2133725881576538, "logits/rejected": 1.225083589553833, "logps/chosen": -1.567310094833374, "logps/rejected": -5.462820053100586, "loss": 0.5131, "nll_loss": 0.5032222867012024, "rewards/accuracies": 1.0, "rewards/chosen": -0.1567310094833374, "rewards/margins": 0.38955095410346985, "rewards/rejected": -0.5462819337844849, "step": 5713 }, { "epoch": 15.644079397672828, "grad_norm": 5.669932842254639, "learning_rate": 2.1726027397260274e-07, "log_odds_chosen": 2.8151473999023438, "log_odds_ratio": -0.42067548632621765, "logits/chosen": 1.3297467231750488, "logits/rejected": 1.2682610750198364, "logps/chosen": -1.7340730428695679, "logps/rejected": -4.301582336425781, "loss": 0.4729, "nll_loss": 0.43079856038093567, "rewards/accuracies": 0.875, "rewards/chosen": -0.17340730130672455, "rewards/margins": 0.2567509710788727, "rewards/rejected": -0.4301583170890808, "step": 5714 }, { "epoch": 15.646817248459959, "grad_norm": 3.564410924911499, "learning_rate": 2.1712328767123286e-07, "log_odds_chosen": 1.7857458591461182, "log_odds_ratio": -0.19646450877189636, "logits/chosen": 1.0435726642608643, "logits/rejected": 1.0849697589874268, "logps/chosen": -2.111182451248169, "logps/rejected": -3.7599966526031494, "loss": 0.5299, "nll_loss": 0.5102885365486145, "rewards/accuracies": 1.0, "rewards/chosen": -0.21111825108528137, "rewards/margins": 0.16488143801689148, "rewards/rejected": -0.37599968910217285, "step": 5715 }, { "epoch": 15.649555099247092, "grad_norm": 7.465213775634766, "learning_rate": 2.16986301369863e-07, "log_odds_chosen": 4.095180988311768, "log_odds_ratio": -0.250244140625, "logits/chosen": 1.2786825895309448, "logits/rejected": 1.265372633934021, "logps/chosen": -1.9418691396713257, "logps/rejected": -5.707549571990967, "loss": 0.5651, "nll_loss": 0.5400400161743164, "rewards/accuracies": 0.875, "rewards/chosen": -0.19418692588806152, "rewards/margins": 0.3765679895877838, "rewards/rejected": -0.5707549452781677, "step": 5716 }, { "epoch": 15.652292950034223, "grad_norm": 4.697350978851318, "learning_rate": 2.1684931506849314e-07, "log_odds_chosen": 1.7673944234848022, "log_odds_ratio": -0.2598772346973419, "logits/chosen": 0.860683023929596, "logits/rejected": 0.8514699339866638, "logps/chosen": -2.054069995880127, "logps/rejected": -3.6332931518554688, "loss": 0.6239, "nll_loss": 0.5979613065719604, "rewards/accuracies": 0.875, "rewards/chosen": -0.2054070234298706, "rewards/margins": 0.15792232751846313, "rewards/rejected": -0.36332935094833374, "step": 5717 }, { "epoch": 15.655030800821356, "grad_norm": 10.115463256835938, "learning_rate": 2.167123287671233e-07, "log_odds_chosen": 2.929928779602051, "log_odds_ratio": -0.20290838181972504, "logits/chosen": 1.0419143438339233, "logits/rejected": 1.0079171657562256, "logps/chosen": -1.8539578914642334, "logps/rejected": -4.570044040679932, "loss": 0.5754, "nll_loss": 0.5550853610038757, "rewards/accuracies": 1.0, "rewards/chosen": -0.18539577722549438, "rewards/margins": 0.2716086506843567, "rewards/rejected": -0.4570044279098511, "step": 5718 }, { "epoch": 15.657768651608487, "grad_norm": 3.5093531608581543, "learning_rate": 2.165753424657534e-07, "log_odds_chosen": 4.000848293304443, "log_odds_ratio": -0.14637717604637146, "logits/chosen": 0.9738441705703735, "logits/rejected": 0.9119369983673096, "logps/chosen": -2.0211288928985596, "logps/rejected": -5.848336696624756, "loss": 0.614, "nll_loss": 0.5993173122406006, "rewards/accuracies": 1.0, "rewards/chosen": -0.20211288332939148, "rewards/margins": 0.38272082805633545, "rewards/rejected": -0.5848336815834045, "step": 5719 }, { "epoch": 15.66050650239562, "grad_norm": 4.862808704376221, "learning_rate": 2.1643835616438354e-07, "log_odds_chosen": 2.252419948577881, "log_odds_ratio": -0.24767664074897766, "logits/chosen": 1.3812124729156494, "logits/rejected": 1.364527940750122, "logps/chosen": -1.9429901838302612, "logps/rejected": -4.08144998550415, "loss": 0.5891, "nll_loss": 0.5643315315246582, "rewards/accuracies": 0.875, "rewards/chosen": -0.19429902732372284, "rewards/margins": 0.21384599804878235, "rewards/rejected": -0.40814507007598877, "step": 5720 }, { "epoch": 15.663244353182751, "grad_norm": 3.5336413383483887, "learning_rate": 2.163013698630137e-07, "log_odds_chosen": 1.847076177597046, "log_odds_ratio": -0.3182334005832672, "logits/chosen": 0.9438940286636353, "logits/rejected": 0.9440453052520752, "logps/chosen": -1.3669153451919556, "logps/rejected": -3.0140061378479004, "loss": 0.4909, "nll_loss": 0.45908114314079285, "rewards/accuracies": 0.875, "rewards/chosen": -0.13669155538082123, "rewards/margins": 0.16470906138420105, "rewards/rejected": -0.3014006018638611, "step": 5721 }, { "epoch": 15.665982203969884, "grad_norm": 3.7333896160125732, "learning_rate": 2.1616438356164382e-07, "log_odds_chosen": 3.6340713500976562, "log_odds_ratio": -0.11913962662220001, "logits/chosen": 1.1290984153747559, "logits/rejected": 1.0719209909439087, "logps/chosen": -1.7150845527648926, "logps/rejected": -5.113245964050293, "loss": 0.5113, "nll_loss": 0.4993972182273865, "rewards/accuracies": 1.0, "rewards/chosen": -0.17150846123695374, "rewards/margins": 0.33981606364250183, "rewards/rejected": -0.5113245248794556, "step": 5722 }, { "epoch": 15.668720054757015, "grad_norm": 4.196352958679199, "learning_rate": 2.1602739726027397e-07, "log_odds_chosen": 1.5458306074142456, "log_odds_ratio": -0.4483100175857544, "logits/chosen": 0.7807237505912781, "logits/rejected": 0.7424634695053101, "logps/chosen": -2.650657892227173, "logps/rejected": -3.988325595855713, "loss": 0.528, "nll_loss": 0.4831423759460449, "rewards/accuracies": 0.75, "rewards/chosen": -0.2650657892227173, "rewards/margins": 0.1337667852640152, "rewards/rejected": -0.3988325893878937, "step": 5723 }, { "epoch": 15.671457905544148, "grad_norm": 3.539271831512451, "learning_rate": 2.158904109589041e-07, "log_odds_chosen": 0.8404585719108582, "log_odds_ratio": -0.44391143321990967, "logits/chosen": 1.0470529794692993, "logits/rejected": 0.9647918939590454, "logps/chosen": -1.7950482368469238, "logps/rejected": -2.524573802947998, "loss": 0.58, "nll_loss": 0.5355854630470276, "rewards/accuracies": 1.0, "rewards/chosen": -0.17950482666492462, "rewards/margins": 0.07295256108045578, "rewards/rejected": -0.2524573802947998, "step": 5724 }, { "epoch": 15.67419575633128, "grad_norm": 4.081814765930176, "learning_rate": 2.1575342465753425e-07, "log_odds_chosen": 1.5906877517700195, "log_odds_ratio": -0.30177587270736694, "logits/chosen": 0.9539209008216858, "logits/rejected": 0.9453094005584717, "logps/chosen": -1.9765636920928955, "logps/rejected": -3.405080795288086, "loss": 0.4585, "nll_loss": 0.4282898008823395, "rewards/accuracies": 0.875, "rewards/chosen": -0.19765637814998627, "rewards/margins": 0.14285171031951904, "rewards/rejected": -0.3405080735683441, "step": 5725 }, { "epoch": 15.676933607118412, "grad_norm": 3.9318459033966064, "learning_rate": 2.1561643835616437e-07, "log_odds_chosen": 2.1482653617858887, "log_odds_ratio": -0.20014411211013794, "logits/chosen": 0.9305738210678101, "logits/rejected": 0.9981040954589844, "logps/chosen": -2.0730462074279785, "logps/rejected": -4.097761154174805, "loss": 0.5139, "nll_loss": 0.493863046169281, "rewards/accuracies": 1.0, "rewards/chosen": -0.20730459690093994, "rewards/margins": 0.2024715095758438, "rewards/rejected": -0.40977615118026733, "step": 5726 }, { "epoch": 15.679671457905544, "grad_norm": 3.72312331199646, "learning_rate": 2.154794520547945e-07, "log_odds_chosen": 3.0563201904296875, "log_odds_ratio": -0.1883615106344223, "logits/chosen": 1.2047173976898193, "logits/rejected": 1.1107264757156372, "logps/chosen": -1.361844539642334, "logps/rejected": -4.142961025238037, "loss": 0.4139, "nll_loss": 0.395040363073349, "rewards/accuracies": 1.0, "rewards/chosen": -0.1361844539642334, "rewards/margins": 0.27811166644096375, "rewards/rejected": -0.41429609060287476, "step": 5727 }, { "epoch": 15.682409308692677, "grad_norm": 5.223007678985596, "learning_rate": 2.1534246575342465e-07, "log_odds_chosen": 2.9142444133758545, "log_odds_ratio": -0.16264265775680542, "logits/chosen": 1.0909004211425781, "logits/rejected": 0.9035606384277344, "logps/chosen": -2.284167766571045, "logps/rejected": -5.02864408493042, "loss": 0.5936, "nll_loss": 0.5773571729660034, "rewards/accuracies": 0.875, "rewards/chosen": -0.22841675579547882, "rewards/margins": 0.27444761991500854, "rewards/rejected": -0.502864420413971, "step": 5728 }, { "epoch": 15.685147159479808, "grad_norm": 6.520198345184326, "learning_rate": 2.1520547945205478e-07, "log_odds_chosen": 1.8705084323883057, "log_odds_ratio": -0.7804338932037354, "logits/chosen": 1.1713402271270752, "logits/rejected": 1.241263747215271, "logps/chosen": -3.5369036197662354, "logps/rejected": -5.369550704956055, "loss": 0.6717, "nll_loss": 0.5936332941055298, "rewards/accuracies": 0.75, "rewards/chosen": -0.35369041562080383, "rewards/margins": 0.18326470255851746, "rewards/rejected": -0.5369550585746765, "step": 5729 }, { "epoch": 15.68788501026694, "grad_norm": 3.4286227226257324, "learning_rate": 2.1506849315068493e-07, "log_odds_chosen": 3.261970281600952, "log_odds_ratio": -0.18016724288463593, "logits/chosen": 0.9584537744522095, "logits/rejected": 0.8706687688827515, "logps/chosen": -1.4293906688690186, "logps/rejected": -4.457974433898926, "loss": 0.4273, "nll_loss": 0.4092893600463867, "rewards/accuracies": 1.0, "rewards/chosen": -0.14293906092643738, "rewards/margins": 0.3028584122657776, "rewards/rejected": -0.44579747319221497, "step": 5730 }, { "epoch": 15.690622861054072, "grad_norm": 5.931291103363037, "learning_rate": 2.1493150684931506e-07, "log_odds_chosen": 2.0293726921081543, "log_odds_ratio": -0.41694116592407227, "logits/chosen": 1.115039348602295, "logits/rejected": 1.0159804821014404, "logps/chosen": -2.2436044216156006, "logps/rejected": -4.109621524810791, "loss": 0.6706, "nll_loss": 0.6289492249488831, "rewards/accuracies": 0.875, "rewards/chosen": -0.22436043620109558, "rewards/margins": 0.18660171329975128, "rewards/rejected": -0.41096216440200806, "step": 5731 }, { "epoch": 15.693360711841205, "grad_norm": 7.514641761779785, "learning_rate": 2.147945205479452e-07, "log_odds_chosen": 1.195724368095398, "log_odds_ratio": -0.6651310920715332, "logits/chosen": 0.9326920509338379, "logits/rejected": 0.9256742596626282, "logps/chosen": -3.032978057861328, "logps/rejected": -4.08170747756958, "loss": 0.5182, "nll_loss": 0.4516500234603882, "rewards/accuracies": 0.875, "rewards/chosen": -0.30329781770706177, "rewards/margins": 0.10487294942140579, "rewards/rejected": -0.40817075967788696, "step": 5732 }, { "epoch": 15.696098562628336, "grad_norm": 3.577012300491333, "learning_rate": 2.1465753424657533e-07, "log_odds_chosen": 2.6117334365844727, "log_odds_ratio": -0.18455453217029572, "logits/chosen": 1.1722742319107056, "logits/rejected": 1.2393615245819092, "logps/chosen": -2.2932627201080322, "logps/rejected": -4.8199615478515625, "loss": 0.6286, "nll_loss": 0.6101489067077637, "rewards/accuracies": 1.0, "rewards/chosen": -0.2293262779712677, "rewards/margins": 0.25266990065574646, "rewards/rejected": -0.4819961190223694, "step": 5733 }, { "epoch": 15.698836413415469, "grad_norm": 6.7238335609436035, "learning_rate": 2.1452054794520546e-07, "log_odds_chosen": 2.2350635528564453, "log_odds_ratio": -0.43064671754837036, "logits/chosen": 1.1196961402893066, "logits/rejected": 1.12507164478302, "logps/chosen": -2.885920524597168, "logps/rejected": -5.037815570831299, "loss": 0.6854, "nll_loss": 0.6423608064651489, "rewards/accuracies": 0.75, "rewards/chosen": -0.28859207034111023, "rewards/margins": 0.21518951654434204, "rewards/rejected": -0.5037815570831299, "step": 5734 }, { "epoch": 15.7015742642026, "grad_norm": 3.6614840030670166, "learning_rate": 2.143835616438356e-07, "log_odds_chosen": 2.488645076751709, "log_odds_ratio": -0.1800106316804886, "logits/chosen": 0.9928315281867981, "logits/rejected": 0.9050315618515015, "logps/chosen": -1.4604634046554565, "logps/rejected": -3.710564136505127, "loss": 0.4887, "nll_loss": 0.4707309901714325, "rewards/accuracies": 1.0, "rewards/chosen": -0.14604634046554565, "rewards/margins": 0.22501006722450256, "rewards/rejected": -0.3710564374923706, "step": 5735 }, { "epoch": 15.704312114989733, "grad_norm": 3.8911428451538086, "learning_rate": 2.1424657534246574e-07, "log_odds_chosen": 3.2834901809692383, "log_odds_ratio": -0.19813692569732666, "logits/chosen": 0.7681035995483398, "logits/rejected": 0.7317166924476624, "logps/chosen": -2.042140245437622, "logps/rejected": -5.189326286315918, "loss": 0.5163, "nll_loss": 0.49646490812301636, "rewards/accuracies": 1.0, "rewards/chosen": -0.20421403646469116, "rewards/margins": 0.314718633890152, "rewards/rejected": -0.5189327001571655, "step": 5736 }, { "epoch": 15.707049965776864, "grad_norm": 7.34098482131958, "learning_rate": 2.141095890410959e-07, "log_odds_chosen": 2.2416017055511475, "log_odds_ratio": -0.6532074809074402, "logits/chosen": 1.1514148712158203, "logits/rejected": 1.1850502490997314, "logps/chosen": -2.7279293537139893, "logps/rejected": -4.874250411987305, "loss": 0.7346, "nll_loss": 0.669323742389679, "rewards/accuracies": 0.75, "rewards/chosen": -0.2727929353713989, "rewards/margins": 0.21463216841220856, "rewards/rejected": -0.4874250888824463, "step": 5737 }, { "epoch": 15.709787816563997, "grad_norm": 3.6947784423828125, "learning_rate": 2.1397260273972601e-07, "log_odds_chosen": 2.984996795654297, "log_odds_ratio": -0.2370244264602661, "logits/chosen": 1.2187466621398926, "logits/rejected": 1.1831645965576172, "logps/chosen": -1.6299939155578613, "logps/rejected": -4.4220170974731445, "loss": 0.558, "nll_loss": 0.5343316793441772, "rewards/accuracies": 0.875, "rewards/chosen": -0.16299939155578613, "rewards/margins": 0.27920231223106384, "rewards/rejected": -0.44220170378685, "step": 5738 }, { "epoch": 15.71252566735113, "grad_norm": 3.580918788909912, "learning_rate": 2.1383561643835617e-07, "log_odds_chosen": 2.3346669673919678, "log_odds_ratio": -0.18741944432258606, "logits/chosen": 1.1585290431976318, "logits/rejected": 1.1289105415344238, "logps/chosen": -2.062560558319092, "logps/rejected": -4.254306793212891, "loss": 0.5787, "nll_loss": 0.5599091649055481, "rewards/accuracies": 1.0, "rewards/chosen": -0.20625604689121246, "rewards/margins": 0.21917462348937988, "rewards/rejected": -0.42543065547943115, "step": 5739 }, { "epoch": 15.715263518138261, "grad_norm": 5.674002647399902, "learning_rate": 2.136986301369863e-07, "log_odds_chosen": 2.8570990562438965, "log_odds_ratio": -0.18723070621490479, "logits/chosen": 1.4028736352920532, "logits/rejected": 1.4145722389221191, "logps/chosen": -1.9872227907180786, "logps/rejected": -4.619483470916748, "loss": 0.5539, "nll_loss": 0.535133957862854, "rewards/accuracies": 1.0, "rewards/chosen": -0.19872228801250458, "rewards/margins": 0.26322609186172485, "rewards/rejected": -0.46194836497306824, "step": 5740 }, { "epoch": 15.718001368925394, "grad_norm": 3.847651720046997, "learning_rate": 2.1356164383561642e-07, "log_odds_chosen": 2.8024423122406006, "log_odds_ratio": -0.3590583801269531, "logits/chosen": 1.0077232122421265, "logits/rejected": 1.026716709136963, "logps/chosen": -1.8645787239074707, "logps/rejected": -4.570067405700684, "loss": 0.559, "nll_loss": 0.5230464339256287, "rewards/accuracies": 1.0, "rewards/chosen": -0.18645788729190826, "rewards/margins": 0.27054885029792786, "rewards/rejected": -0.4570067226886749, "step": 5741 }, { "epoch": 15.720739219712526, "grad_norm": 3.2738473415374756, "learning_rate": 2.1342465753424657e-07, "log_odds_chosen": 2.683248996734619, "log_odds_ratio": -0.11488895118236542, "logits/chosen": 1.2448303699493408, "logits/rejected": 1.2178623676300049, "logps/chosen": -1.9542847871780396, "logps/rejected": -4.472655773162842, "loss": 0.5389, "nll_loss": 0.527428925037384, "rewards/accuracies": 1.0, "rewards/chosen": -0.19542847573757172, "rewards/margins": 0.2518370747566223, "rewards/rejected": -0.4472655653953552, "step": 5742 }, { "epoch": 15.723477070499658, "grad_norm": 3.407365322113037, "learning_rate": 2.132876712328767e-07, "log_odds_chosen": 2.9508004188537598, "log_odds_ratio": -0.1746266484260559, "logits/chosen": 1.0654034614562988, "logits/rejected": 1.028676986694336, "logps/chosen": -1.5234907865524292, "logps/rejected": -4.221794605255127, "loss": 0.5381, "nll_loss": 0.5206161737442017, "rewards/accuracies": 1.0, "rewards/chosen": -0.1523490846157074, "rewards/margins": 0.2698304057121277, "rewards/rejected": -0.4221794903278351, "step": 5743 }, { "epoch": 15.72621492128679, "grad_norm": 3.9985365867614746, "learning_rate": 2.1315068493150685e-07, "log_odds_chosen": 3.261425018310547, "log_odds_ratio": -0.10920076817274094, "logits/chosen": 0.7309930920600891, "logits/rejected": 0.7685424089431763, "logps/chosen": -2.1116724014282227, "logps/rejected": -5.177909851074219, "loss": 0.6985, "nll_loss": 0.6876087188720703, "rewards/accuracies": 1.0, "rewards/chosen": -0.21116723120212555, "rewards/margins": 0.30662375688552856, "rewards/rejected": -0.5177909731864929, "step": 5744 }, { "epoch": 15.728952772073923, "grad_norm": 5.549901962280273, "learning_rate": 2.13013698630137e-07, "log_odds_chosen": 0.8213188648223877, "log_odds_ratio": -0.48761123418807983, "logits/chosen": 1.1390126943588257, "logits/rejected": 1.0776114463806152, "logps/chosen": -1.7807047367095947, "logps/rejected": -2.477804183959961, "loss": 0.559, "nll_loss": 0.5102798342704773, "rewards/accuracies": 0.875, "rewards/chosen": -0.17807048559188843, "rewards/margins": 0.06970994174480438, "rewards/rejected": -0.2477804273366928, "step": 5745 }, { "epoch": 15.731690622861054, "grad_norm": 4.115334510803223, "learning_rate": 2.128767123287671e-07, "log_odds_chosen": 2.180870294570923, "log_odds_ratio": -0.15597140789031982, "logits/chosen": 1.1498548984527588, "logits/rejected": 1.1256612539291382, "logps/chosen": -2.0186970233917236, "logps/rejected": -4.061558723449707, "loss": 0.4746, "nll_loss": 0.45903173089027405, "rewards/accuracies": 1.0, "rewards/chosen": -0.20186971127986908, "rewards/margins": 0.20428615808486938, "rewards/rejected": -0.40615588426589966, "step": 5746 }, { "epoch": 15.734428473648187, "grad_norm": 3.749337673187256, "learning_rate": 2.1273972602739725e-07, "log_odds_chosen": 3.0111501216888428, "log_odds_ratio": -0.1851610541343689, "logits/chosen": 0.9293062686920166, "logits/rejected": 0.965741753578186, "logps/chosen": -2.204737424850464, "logps/rejected": -5.028337478637695, "loss": 0.6997, "nll_loss": 0.6811919808387756, "rewards/accuracies": 1.0, "rewards/chosen": -0.2204737365245819, "rewards/margins": 0.2823600471019745, "rewards/rejected": -0.5028337836265564, "step": 5747 }, { "epoch": 15.737166324435318, "grad_norm": 6.300346374511719, "learning_rate": 2.1260273972602738e-07, "log_odds_chosen": 1.764505386352539, "log_odds_ratio": -0.35961049795150757, "logits/chosen": 1.1879396438598633, "logits/rejected": 1.2485978603363037, "logps/chosen": -2.5489816665649414, "logps/rejected": -4.179158687591553, "loss": 0.6421, "nll_loss": 0.606163501739502, "rewards/accuracies": 0.875, "rewards/chosen": -0.25489819049835205, "rewards/margins": 0.16301770508289337, "rewards/rejected": -0.41791588068008423, "step": 5748 }, { "epoch": 15.739904175222451, "grad_norm": 4.493806838989258, "learning_rate": 2.1246575342465753e-07, "log_odds_chosen": 2.392265796661377, "log_odds_ratio": -0.22067220509052277, "logits/chosen": 0.7577892541885376, "logits/rejected": 0.637282133102417, "logps/chosen": -1.8159334659576416, "logps/rejected": -4.001838684082031, "loss": 0.5807, "nll_loss": 0.5586089491844177, "rewards/accuracies": 0.875, "rewards/chosen": -0.18159334361553192, "rewards/margins": 0.21859055757522583, "rewards/rejected": -0.40018388628959656, "step": 5749 }, { "epoch": 15.742642026009582, "grad_norm": 3.6493916511535645, "learning_rate": 2.1232876712328765e-07, "log_odds_chosen": 2.1255571842193604, "log_odds_ratio": -0.1939035952091217, "logits/chosen": 0.7707773447036743, "logits/rejected": 0.7533155679702759, "logps/chosen": -1.7847115993499756, "logps/rejected": -3.659426689147949, "loss": 0.4384, "nll_loss": 0.4190121293067932, "rewards/accuracies": 1.0, "rewards/chosen": -0.1784711629152298, "rewards/margins": 0.18747149407863617, "rewards/rejected": -0.36594268679618835, "step": 5750 }, { "epoch": 15.745379876796715, "grad_norm": 3.8821089267730713, "learning_rate": 2.121917808219178e-07, "log_odds_chosen": 3.115619421005249, "log_odds_ratio": -0.1130603775382042, "logits/chosen": 1.3334193229675293, "logits/rejected": 1.4143246412277222, "logps/chosen": -2.0486395359039307, "logps/rejected": -4.836335182189941, "loss": 0.5123, "nll_loss": 0.5010099411010742, "rewards/accuracies": 1.0, "rewards/chosen": -0.20486396551132202, "rewards/margins": 0.2787695527076721, "rewards/rejected": -0.48363354802131653, "step": 5751 }, { "epoch": 15.748117727583846, "grad_norm": 3.750014543533325, "learning_rate": 2.1205479452054796e-07, "log_odds_chosen": 1.8645894527435303, "log_odds_ratio": -0.2662413418292999, "logits/chosen": 1.076997995376587, "logits/rejected": 1.0656155347824097, "logps/chosen": -2.0024421215057373, "logps/rejected": -3.7012083530426025, "loss": 0.5053, "nll_loss": 0.4787234961986542, "rewards/accuracies": 1.0, "rewards/chosen": -0.2002442181110382, "rewards/margins": 0.16987662017345428, "rewards/rejected": -0.3701208233833313, "step": 5752 }, { "epoch": 15.75085557837098, "grad_norm": 4.439240455627441, "learning_rate": 2.1191780821917806e-07, "log_odds_chosen": 1.2045059204101562, "log_odds_ratio": -0.4487592577934265, "logits/chosen": 0.9511439800262451, "logits/rejected": 0.9090376496315002, "logps/chosen": -2.253962993621826, "logps/rejected": -3.3656907081604004, "loss": 0.4526, "nll_loss": 0.4076751470565796, "rewards/accuracies": 0.75, "rewards/chosen": -0.2253962904214859, "rewards/margins": 0.11117278039455414, "rewards/rejected": -0.33656907081604004, "step": 5753 }, { "epoch": 15.75359342915811, "grad_norm": 9.282387733459473, "learning_rate": 2.117808219178082e-07, "log_odds_chosen": 2.433189868927002, "log_odds_ratio": -0.48903200030326843, "logits/chosen": 1.2722578048706055, "logits/rejected": 1.2092022895812988, "logps/chosen": -2.1113407611846924, "logps/rejected": -4.326018333435059, "loss": 0.4681, "nll_loss": 0.4192357063293457, "rewards/accuracies": 0.875, "rewards/chosen": -0.21113407611846924, "rewards/margins": 0.2214677333831787, "rewards/rejected": -0.43260183930397034, "step": 5754 }, { "epoch": 15.756331279945243, "grad_norm": 4.431771278381348, "learning_rate": 2.1164383561643833e-07, "log_odds_chosen": 1.6072856187820435, "log_odds_ratio": -0.23139262199401855, "logits/chosen": 0.908864438533783, "logits/rejected": 0.862206757068634, "logps/chosen": -2.0650315284729004, "logps/rejected": -3.497150421142578, "loss": 0.578, "nll_loss": 0.5548200011253357, "rewards/accuracies": 1.0, "rewards/chosen": -0.20650318264961243, "rewards/margins": 0.14321187138557434, "rewards/rejected": -0.34971505403518677, "step": 5755 }, { "epoch": 15.759069130732374, "grad_norm": 3.596510887145996, "learning_rate": 2.115068493150685e-07, "log_odds_chosen": 1.7828257083892822, "log_odds_ratio": -0.2204904407262802, "logits/chosen": 1.3983290195465088, "logits/rejected": 1.389230489730835, "logps/chosen": -2.0964746475219727, "logps/rejected": -3.758754014968872, "loss": 0.5188, "nll_loss": 0.4967755079269409, "rewards/accuracies": 1.0, "rewards/chosen": -0.20964749157428741, "rewards/margins": 0.16622792184352875, "rewards/rejected": -0.37587541341781616, "step": 5756 }, { "epoch": 15.761806981519507, "grad_norm": 3.7935447692871094, "learning_rate": 2.113698630136986e-07, "log_odds_chosen": 4.419286727905273, "log_odds_ratio": -0.11179613322019577, "logits/chosen": 1.239383339881897, "logits/rejected": 1.285728931427002, "logps/chosen": -2.493637800216675, "logps/rejected": -6.785188674926758, "loss": 0.6157, "nll_loss": 0.6045072078704834, "rewards/accuracies": 1.0, "rewards/chosen": -0.24936378002166748, "rewards/margins": 0.4291550815105438, "rewards/rejected": -0.6785188913345337, "step": 5757 }, { "epoch": 15.764544832306639, "grad_norm": 3.9911413192749023, "learning_rate": 2.1123287671232876e-07, "log_odds_chosen": 2.890549659729004, "log_odds_ratio": -0.2552584707736969, "logits/chosen": 1.0105003118515015, "logits/rejected": 1.0214089155197144, "logps/chosen": -2.1354899406433105, "logps/rejected": -4.864892959594727, "loss": 0.6335, "nll_loss": 0.6079602241516113, "rewards/accuracies": 1.0, "rewards/chosen": -0.21354898810386658, "rewards/margins": 0.27294033765792847, "rewards/rejected": -0.48648932576179504, "step": 5758 }, { "epoch": 15.767282683093772, "grad_norm": 4.849108695983887, "learning_rate": 2.1109589041095892e-07, "log_odds_chosen": 2.675661325454712, "log_odds_ratio": -0.20180127024650574, "logits/chosen": 1.0847227573394775, "logits/rejected": 1.152185320854187, "logps/chosen": -2.057353973388672, "logps/rejected": -4.482482433319092, "loss": 0.5945, "nll_loss": 0.5742944478988647, "rewards/accuracies": 0.875, "rewards/chosen": -0.20573538541793823, "rewards/margins": 0.24251288175582886, "rewards/rejected": -0.4482482969760895, "step": 5759 }, { "epoch": 15.770020533880903, "grad_norm": 3.634767770767212, "learning_rate": 2.1095890410958902e-07, "log_odds_chosen": 2.3405606746673584, "log_odds_ratio": -0.159828320145607, "logits/chosen": 1.4196810722351074, "logits/rejected": 1.3666503429412842, "logps/chosen": -1.6018481254577637, "logps/rejected": -3.7321434020996094, "loss": 0.4994, "nll_loss": 0.48339536786079407, "rewards/accuracies": 1.0, "rewards/chosen": -0.1601848304271698, "rewards/margins": 0.21302953362464905, "rewards/rejected": -0.37321436405181885, "step": 5760 }, { "epoch": 15.772758384668036, "grad_norm": 8.954696655273438, "learning_rate": 2.1082191780821917e-07, "log_odds_chosen": 1.0338797569274902, "log_odds_ratio": -0.5049743056297302, "logits/chosen": 1.4485241174697876, "logits/rejected": 1.4922761917114258, "logps/chosen": -3.1433403491973877, "logps/rejected": -4.094054698944092, "loss": 0.6473, "nll_loss": 0.5967580676078796, "rewards/accuracies": 0.875, "rewards/chosen": -0.31433403491973877, "rewards/margins": 0.09507143497467041, "rewards/rejected": -0.40940549969673157, "step": 5761 }, { "epoch": 15.775496235455167, "grad_norm": 4.573916912078857, "learning_rate": 2.106849315068493e-07, "log_odds_chosen": 6.227278709411621, "log_odds_ratio": -0.054172009229660034, "logits/chosen": 1.1468663215637207, "logits/rejected": 1.190422534942627, "logps/chosen": -2.005398750305176, "logps/rejected": -8.039169311523438, "loss": 0.6643, "nll_loss": 0.6588574051856995, "rewards/accuracies": 1.0, "rewards/chosen": -0.20053988695144653, "rewards/margins": 0.6033769845962524, "rewards/rejected": -0.8039169311523438, "step": 5762 }, { "epoch": 15.7782340862423, "grad_norm": 5.75731086730957, "learning_rate": 2.1054794520547945e-07, "log_odds_chosen": 2.8906047344207764, "log_odds_ratio": -0.1809203326702118, "logits/chosen": 1.2177492380142212, "logits/rejected": 1.2568410634994507, "logps/chosen": -2.1038081645965576, "logps/rejected": -4.830354690551758, "loss": 0.5619, "nll_loss": 0.5438047647476196, "rewards/accuracies": 1.0, "rewards/chosen": -0.21038082242012024, "rewards/margins": 0.27265465259552, "rewards/rejected": -0.48303547501564026, "step": 5763 }, { "epoch": 15.780971937029431, "grad_norm": 3.654743194580078, "learning_rate": 2.104109589041096e-07, "log_odds_chosen": 1.9918168783187866, "log_odds_ratio": -0.20994988083839417, "logits/chosen": 0.9650657773017883, "logits/rejected": 0.9688922166824341, "logps/chosen": -2.175930976867676, "logps/rejected": -4.012070655822754, "loss": 0.5902, "nll_loss": 0.5692488551139832, "rewards/accuracies": 0.875, "rewards/chosen": -0.21759313344955444, "rewards/margins": 0.18361398577690125, "rewards/rejected": -0.4012070894241333, "step": 5764 }, { "epoch": 15.783709787816564, "grad_norm": 3.706899881362915, "learning_rate": 2.1027397260273972e-07, "log_odds_chosen": 3.308201789855957, "log_odds_ratio": -0.15815389156341553, "logits/chosen": 1.0818915367126465, "logits/rejected": 1.1105015277862549, "logps/chosen": -1.7953872680664062, "logps/rejected": -4.8472514152526855, "loss": 0.5032, "nll_loss": 0.4874119162559509, "rewards/accuracies": 1.0, "rewards/chosen": -0.17953872680664062, "rewards/margins": 0.3051864504814148, "rewards/rejected": -0.48472514748573303, "step": 5765 }, { "epoch": 15.786447638603697, "grad_norm": 4.104331016540527, "learning_rate": 2.1013698630136985e-07, "log_odds_chosen": 2.819181203842163, "log_odds_ratio": -0.23859107494354248, "logits/chosen": 0.9359825849533081, "logits/rejected": 0.924481987953186, "logps/chosen": -1.3409571647644043, "logps/rejected": -3.8908653259277344, "loss": 0.523, "nll_loss": 0.49915796518325806, "rewards/accuracies": 1.0, "rewards/chosen": -0.13409572839736938, "rewards/margins": 0.254990816116333, "rewards/rejected": -0.3890865445137024, "step": 5766 }, { "epoch": 15.789185489390828, "grad_norm": 4.794327259063721, "learning_rate": 2.0999999999999997e-07, "log_odds_chosen": 1.2348374128341675, "log_odds_ratio": -0.3836248517036438, "logits/chosen": 1.1216323375701904, "logits/rejected": 1.0845928192138672, "logps/chosen": -1.7544991970062256, "logps/rejected": -2.871725082397461, "loss": 0.5042, "nll_loss": 0.46581244468688965, "rewards/accuracies": 0.75, "rewards/chosen": -0.1754499077796936, "rewards/margins": 0.11172255873680115, "rewards/rejected": -0.28717246651649475, "step": 5767 }, { "epoch": 15.791923340177961, "grad_norm": 7.709228038787842, "learning_rate": 2.0986301369863013e-07, "log_odds_chosen": 3.4245424270629883, "log_odds_ratio": -0.20048250257968903, "logits/chosen": 1.0481195449829102, "logits/rejected": 0.9795519113540649, "logps/chosen": -2.168673038482666, "logps/rejected": -5.452841281890869, "loss": 0.6017, "nll_loss": 0.581620454788208, "rewards/accuracies": 0.875, "rewards/chosen": -0.21686731278896332, "rewards/margins": 0.3284168243408203, "rewards/rejected": -0.5452841520309448, "step": 5768 }, { "epoch": 15.794661190965092, "grad_norm": 3.927842378616333, "learning_rate": 2.0972602739726025e-07, "log_odds_chosen": 2.8427445888519287, "log_odds_ratio": -0.17387622594833374, "logits/chosen": 1.08354651927948, "logits/rejected": 1.1180524826049805, "logps/chosen": -1.4985311031341553, "logps/rejected": -4.053820610046387, "loss": 0.4748, "nll_loss": 0.4573654234409332, "rewards/accuracies": 1.0, "rewards/chosen": -0.14985311031341553, "rewards/margins": 0.25552898645401, "rewards/rejected": -0.40538206696510315, "step": 5769 }, { "epoch": 15.797399041752225, "grad_norm": 3.5690314769744873, "learning_rate": 2.095890410958904e-07, "log_odds_chosen": 2.9310946464538574, "log_odds_ratio": -0.19845440983772278, "logits/chosen": 1.1091121435165405, "logits/rejected": 1.063893437385559, "logps/chosen": -2.233388662338257, "logps/rejected": -5.037362575531006, "loss": 0.558, "nll_loss": 0.5381502509117126, "rewards/accuracies": 1.0, "rewards/chosen": -0.22333887219429016, "rewards/margins": 0.28039735555648804, "rewards/rejected": -0.5037361979484558, "step": 5770 }, { "epoch": 15.800136892539356, "grad_norm": 3.628741979598999, "learning_rate": 2.0945205479452056e-07, "log_odds_chosen": 2.5287275314331055, "log_odds_ratio": -0.2635017931461334, "logits/chosen": 1.0556988716125488, "logits/rejected": 1.0782668590545654, "logps/chosen": -1.7844901084899902, "logps/rejected": -4.093287944793701, "loss": 0.4984, "nll_loss": 0.47205203771591187, "rewards/accuracies": 1.0, "rewards/chosen": -0.17844900488853455, "rewards/margins": 0.2308797836303711, "rewards/rejected": -0.40932878851890564, "step": 5771 }, { "epoch": 15.80287474332649, "grad_norm": 16.13105583190918, "learning_rate": 2.0931506849315068e-07, "log_odds_chosen": -0.05797809362411499, "log_odds_ratio": -1.227480411529541, "logits/chosen": 0.6409262418746948, "logits/rejected": 0.6061349511146545, "logps/chosen": -2.7817108631134033, "logps/rejected": -2.657186269760132, "loss": 0.8233, "nll_loss": 0.7005430459976196, "rewards/accuracies": 0.625, "rewards/chosen": -0.2781710624694824, "rewards/margins": -0.012452440336346626, "rewards/rejected": -0.26571863889694214, "step": 5772 }, { "epoch": 15.80561259411362, "grad_norm": 3.920156240463257, "learning_rate": 2.091780821917808e-07, "log_odds_chosen": 2.409505844116211, "log_odds_ratio": -0.252688467502594, "logits/chosen": 1.064085841178894, "logits/rejected": 0.9230707883834839, "logps/chosen": -1.2456202507019043, "logps/rejected": -3.4057278633117676, "loss": 0.4738, "nll_loss": 0.4485609531402588, "rewards/accuracies": 1.0, "rewards/chosen": -0.12456202507019043, "rewards/margins": 0.21601074934005737, "rewards/rejected": -0.3405727744102478, "step": 5773 }, { "epoch": 15.808350444900753, "grad_norm": 4.460757255554199, "learning_rate": 2.0904109589041093e-07, "log_odds_chosen": 1.9433687925338745, "log_odds_ratio": -0.29636356234550476, "logits/chosen": 1.2906837463378906, "logits/rejected": 1.2921706438064575, "logps/chosen": -2.019529342651367, "logps/rejected": -3.8036630153656006, "loss": 0.5062, "nll_loss": 0.4765336513519287, "rewards/accuracies": 0.875, "rewards/chosen": -0.20195291936397552, "rewards/margins": 0.17841336131095886, "rewards/rejected": -0.38036632537841797, "step": 5774 }, { "epoch": 15.811088295687885, "grad_norm": 3.2885265350341797, "learning_rate": 2.0890410958904109e-07, "log_odds_chosen": 2.3028836250305176, "log_odds_ratio": -0.2462315559387207, "logits/chosen": 1.2032440900802612, "logits/rejected": 1.161726951599121, "logps/chosen": -1.5417335033416748, "logps/rejected": -3.5718536376953125, "loss": 0.5032, "nll_loss": 0.47854191064834595, "rewards/accuracies": 0.875, "rewards/chosen": -0.154173344373703, "rewards/margins": 0.20301201939582825, "rewards/rejected": -0.35718536376953125, "step": 5775 }, { "epoch": 15.813826146475018, "grad_norm": 4.501725673675537, "learning_rate": 2.087671232876712e-07, "log_odds_chosen": 2.2405450344085693, "log_odds_ratio": -0.2493852972984314, "logits/chosen": 0.929407000541687, "logits/rejected": 0.8277795314788818, "logps/chosen": -1.6211353540420532, "logps/rejected": -3.627648115158081, "loss": 0.5124, "nll_loss": 0.48744621872901917, "rewards/accuracies": 0.875, "rewards/chosen": -0.16211354732513428, "rewards/margins": 0.20065127313137054, "rewards/rejected": -0.362764835357666, "step": 5776 }, { "epoch": 15.816563997262149, "grad_norm": 3.4493746757507324, "learning_rate": 2.0863013698630136e-07, "log_odds_chosen": 2.0470662117004395, "log_odds_ratio": -0.2389279007911682, "logits/chosen": 1.1225069761276245, "logits/rejected": 1.1013808250427246, "logps/chosen": -1.1710671186447144, "logps/rejected": -2.930122137069702, "loss": 0.4526, "nll_loss": 0.4286630153656006, "rewards/accuracies": 1.0, "rewards/chosen": -0.11710670590400696, "rewards/margins": 0.1759054958820343, "rewards/rejected": -0.29301223158836365, "step": 5777 }, { "epoch": 15.819301848049282, "grad_norm": 3.6673507690429688, "learning_rate": 2.0849315068493152e-07, "log_odds_chosen": 2.349883794784546, "log_odds_ratio": -0.24740135669708252, "logits/chosen": 1.2943499088287354, "logits/rejected": 1.3075305223464966, "logps/chosen": -1.6803224086761475, "logps/rejected": -3.7812752723693848, "loss": 0.4971, "nll_loss": 0.4723380208015442, "rewards/accuracies": 0.875, "rewards/chosen": -0.16803224384784698, "rewards/margins": 0.21009525656700134, "rewards/rejected": -0.3781275153160095, "step": 5778 }, { "epoch": 15.822039698836413, "grad_norm": 4.005772590637207, "learning_rate": 2.0835616438356164e-07, "log_odds_chosen": 3.215195655822754, "log_odds_ratio": -0.21542471647262573, "logits/chosen": 0.8734560012817383, "logits/rejected": 0.9022507667541504, "logps/chosen": -2.3772401809692383, "logps/rejected": -5.445613384246826, "loss": 0.6513, "nll_loss": 0.6297184228897095, "rewards/accuracies": 1.0, "rewards/chosen": -0.23772402107715607, "rewards/margins": 0.3068373501300812, "rewards/rejected": -0.5445613265037537, "step": 5779 }, { "epoch": 15.824777549623546, "grad_norm": 4.503152370452881, "learning_rate": 2.0821917808219177e-07, "log_odds_chosen": 0.971104621887207, "log_odds_ratio": -0.4493948519229889, "logits/chosen": 1.224142074584961, "logits/rejected": 1.2298475503921509, "logps/chosen": -1.318861961364746, "logps/rejected": -2.000162363052368, "loss": 0.4009, "nll_loss": 0.3559609055519104, "rewards/accuracies": 0.875, "rewards/chosen": -0.13188618421554565, "rewards/margins": 0.06813004612922668, "rewards/rejected": -0.20001623034477234, "step": 5780 }, { "epoch": 15.827515400410677, "grad_norm": 3.443951368331909, "learning_rate": 2.080821917808219e-07, "log_odds_chosen": 3.2437195777893066, "log_odds_ratio": -0.13355955481529236, "logits/chosen": 1.1933903694152832, "logits/rejected": 1.143916130065918, "logps/chosen": -1.8914783000946045, "logps/rejected": -4.9394850730896, "loss": 0.5751, "nll_loss": 0.5617491602897644, "rewards/accuracies": 1.0, "rewards/chosen": -0.18914783000946045, "rewards/margins": 0.3048006594181061, "rewards/rejected": -0.49394845962524414, "step": 5781 }, { "epoch": 15.83025325119781, "grad_norm": 4.1102705001831055, "learning_rate": 2.0794520547945204e-07, "log_odds_chosen": 2.8485734462738037, "log_odds_ratio": -0.15391656756401062, "logits/chosen": 0.9811633825302124, "logits/rejected": 0.9955649971961975, "logps/chosen": -1.8700857162475586, "logps/rejected": -4.5541911125183105, "loss": 0.5771, "nll_loss": 0.5617247819900513, "rewards/accuracies": 1.0, "rewards/chosen": -0.1870085746049881, "rewards/margins": 0.2684105634689331, "rewards/rejected": -0.45541912317276, "step": 5782 }, { "epoch": 15.832991101984941, "grad_norm": 3.716688394546509, "learning_rate": 2.078082191780822e-07, "log_odds_chosen": 2.410377264022827, "log_odds_ratio": -0.25139862298965454, "logits/chosen": 0.9758068323135376, "logits/rejected": 0.9274868965148926, "logps/chosen": -1.603676438331604, "logps/rejected": -3.83955454826355, "loss": 0.4417, "nll_loss": 0.4165106415748596, "rewards/accuracies": 0.875, "rewards/chosen": -0.16036763787269592, "rewards/margins": 0.22358781099319458, "rewards/rejected": -0.3839554786682129, "step": 5783 }, { "epoch": 15.835728952772074, "grad_norm": 3.574312448501587, "learning_rate": 2.0767123287671232e-07, "log_odds_chosen": 3.719653606414795, "log_odds_ratio": -0.17193014919757843, "logits/chosen": 0.9515956044197083, "logits/rejected": 1.0291625261306763, "logps/chosen": -1.9425939321517944, "logps/rejected": -5.517917633056641, "loss": 0.6691, "nll_loss": 0.6519113183021545, "rewards/accuracies": 1.0, "rewards/chosen": -0.1942594051361084, "rewards/margins": 0.3575323224067688, "rewards/rejected": -0.5517917275428772, "step": 5784 }, { "epoch": 15.838466803559205, "grad_norm": 6.6916184425354, "learning_rate": 2.0753424657534247e-07, "log_odds_chosen": 2.1294689178466797, "log_odds_ratio": -0.25331592559814453, "logits/chosen": 1.3601160049438477, "logits/rejected": 1.3015189170837402, "logps/chosen": -2.4333996772766113, "logps/rejected": -4.397472381591797, "loss": 0.5537, "nll_loss": 0.5283684730529785, "rewards/accuracies": 0.875, "rewards/chosen": -0.24333995580673218, "rewards/margins": 0.1964072734117508, "rewards/rejected": -0.43974724411964417, "step": 5785 }, { "epoch": 15.841204654346338, "grad_norm": 3.5748026371002197, "learning_rate": 2.0739726027397257e-07, "log_odds_chosen": 2.798398494720459, "log_odds_ratio": -0.14694710075855255, "logits/chosen": 1.288088083267212, "logits/rejected": 1.25313138961792, "logps/chosen": -1.7710254192352295, "logps/rejected": -4.380016803741455, "loss": 0.5442, "nll_loss": 0.5294668674468994, "rewards/accuracies": 1.0, "rewards/chosen": -0.17710255086421967, "rewards/margins": 0.2608991265296936, "rewards/rejected": -0.43800169229507446, "step": 5786 }, { "epoch": 15.84394250513347, "grad_norm": 4.085709095001221, "learning_rate": 2.0726027397260272e-07, "log_odds_chosen": 2.630753517150879, "log_odds_ratio": -0.2690476179122925, "logits/chosen": 1.0454142093658447, "logits/rejected": 1.019490361213684, "logps/chosen": -2.1250250339508057, "logps/rejected": -4.610104560852051, "loss": 0.6056, "nll_loss": 0.5786757469177246, "rewards/accuracies": 0.875, "rewards/chosen": -0.21250252425670624, "rewards/margins": 0.24850799143314362, "rewards/rejected": -0.46101051568984985, "step": 5787 }, { "epoch": 15.846680355920602, "grad_norm": 3.8938541412353516, "learning_rate": 2.0712328767123285e-07, "log_odds_chosen": 2.3895866870880127, "log_odds_ratio": -0.28356799483299255, "logits/chosen": 1.1983542442321777, "logits/rejected": 1.243200659751892, "logps/chosen": -2.379441738128662, "logps/rejected": -4.675388336181641, "loss": 0.5492, "nll_loss": 0.5208865404129028, "rewards/accuracies": 1.0, "rewards/chosen": -0.23794415593147278, "rewards/margins": 0.22959467768669128, "rewards/rejected": -0.46753886342048645, "step": 5788 }, { "epoch": 15.849418206707734, "grad_norm": 3.5964012145996094, "learning_rate": 2.06986301369863e-07, "log_odds_chosen": 2.1491658687591553, "log_odds_ratio": -0.24436894059181213, "logits/chosen": 1.0398794412612915, "logits/rejected": 0.9876034259796143, "logps/chosen": -1.5025880336761475, "logps/rejected": -3.460265636444092, "loss": 0.4817, "nll_loss": 0.45729565620422363, "rewards/accuracies": 1.0, "rewards/chosen": -0.15025880932807922, "rewards/margins": 0.19576779007911682, "rewards/rejected": -0.34602659940719604, "step": 5789 }, { "epoch": 15.852156057494867, "grad_norm": 14.822063446044922, "learning_rate": 2.0684931506849315e-07, "log_odds_chosen": 3.6225059032440186, "log_odds_ratio": -0.7703412771224976, "logits/chosen": 0.9829773902893066, "logits/rejected": 0.94086092710495, "logps/chosen": -3.120239496231079, "logps/rejected": -6.501180648803711, "loss": 0.7013, "nll_loss": 0.6242728233337402, "rewards/accuracies": 0.75, "rewards/chosen": -0.31202393770217896, "rewards/margins": 0.3380940854549408, "rewards/rejected": -0.6501180529594421, "step": 5790 }, { "epoch": 15.854893908281998, "grad_norm": 3.1558175086975098, "learning_rate": 2.0671232876712328e-07, "log_odds_chosen": 4.354547023773193, "log_odds_ratio": -0.09588257968425751, "logits/chosen": 1.170182228088379, "logits/rejected": 1.2422096729278564, "logps/chosen": -2.5967087745666504, "logps/rejected": -6.6531267166137695, "loss": 0.7307, "nll_loss": 0.7211252450942993, "rewards/accuracies": 0.875, "rewards/chosen": -0.25967085361480713, "rewards/margins": 0.4056418836116791, "rewards/rejected": -0.6653127670288086, "step": 5791 }, { "epoch": 15.85763175906913, "grad_norm": 3.488133192062378, "learning_rate": 2.0657534246575343e-07, "log_odds_chosen": 2.154738426208496, "log_odds_ratio": -0.20354658365249634, "logits/chosen": 1.2983442544937134, "logits/rejected": 1.2400676012039185, "logps/chosen": -1.5639512538909912, "logps/rejected": -3.494845151901245, "loss": 0.4702, "nll_loss": 0.4498831629753113, "rewards/accuracies": 0.875, "rewards/chosen": -0.15639513731002808, "rewards/margins": 0.19308939576148987, "rewards/rejected": -0.34948450326919556, "step": 5792 }, { "epoch": 15.860369609856264, "grad_norm": 3.6750004291534424, "learning_rate": 2.0643835616438353e-07, "log_odds_chosen": 5.266597747802734, "log_odds_ratio": -0.08599122613668442, "logits/chosen": 1.215585470199585, "logits/rejected": 1.117781400680542, "logps/chosen": -2.0298354625701904, "logps/rejected": -7.107518196105957, "loss": 0.6271, "nll_loss": 0.6184512972831726, "rewards/accuracies": 1.0, "rewards/chosen": -0.202983558177948, "rewards/margins": 0.5077682733535767, "rewards/rejected": -0.7107518315315247, "step": 5793 }, { "epoch": 15.863107460643395, "grad_norm": 3.418933629989624, "learning_rate": 2.0630136986301368e-07, "log_odds_chosen": 3.8285927772521973, "log_odds_ratio": -0.0639081597328186, "logits/chosen": 1.2506879568099976, "logits/rejected": 1.2695053815841675, "logps/chosen": -1.6579244136810303, "logps/rejected": -5.159037113189697, "loss": 0.4468, "nll_loss": 0.4403753876686096, "rewards/accuracies": 1.0, "rewards/chosen": -0.16579243540763855, "rewards/margins": 0.3501112759113312, "rewards/rejected": -0.5159037113189697, "step": 5794 }, { "epoch": 15.865845311430528, "grad_norm": 3.7318270206451416, "learning_rate": 2.0616438356164384e-07, "log_odds_chosen": 2.8355143070220947, "log_odds_ratio": -0.14416652917861938, "logits/chosen": 1.1085487604141235, "logits/rejected": 1.1406824588775635, "logps/chosen": -2.1212499141693115, "logps/rejected": -4.812429428100586, "loss": 0.6701, "nll_loss": 0.6556816101074219, "rewards/accuracies": 1.0, "rewards/chosen": -0.2121250033378601, "rewards/margins": 0.26911792159080505, "rewards/rejected": -0.4812428951263428, "step": 5795 }, { "epoch": 15.868583162217659, "grad_norm": 4.012993335723877, "learning_rate": 2.0602739726027396e-07, "log_odds_chosen": 1.4674930572509766, "log_odds_ratio": -0.2351953685283661, "logits/chosen": 1.5004758834838867, "logits/rejected": 1.5273391008377075, "logps/chosen": -2.087282419204712, "logps/rejected": -3.387014150619507, "loss": 0.4764, "nll_loss": 0.4528646171092987, "rewards/accuracies": 1.0, "rewards/chosen": -0.20872825384140015, "rewards/margins": 0.12997318804264069, "rewards/rejected": -0.33870142698287964, "step": 5796 }, { "epoch": 15.871321013004792, "grad_norm": 3.4661858081817627, "learning_rate": 2.0589041095890411e-07, "log_odds_chosen": 4.094385147094727, "log_odds_ratio": -0.1454220563173294, "logits/chosen": 1.132027268409729, "logits/rejected": 1.1659146547317505, "logps/chosen": -1.3998162746429443, "logps/rejected": -5.172974109649658, "loss": 0.5175, "nll_loss": 0.5029477477073669, "rewards/accuracies": 1.0, "rewards/chosen": -0.13998162746429443, "rewards/margins": 0.3773157596588135, "rewards/rejected": -0.5172973871231079, "step": 5797 }, { "epoch": 15.874058863791923, "grad_norm": 3.4795405864715576, "learning_rate": 2.0575342465753424e-07, "log_odds_chosen": 5.226686477661133, "log_odds_ratio": -0.0750299021601677, "logits/chosen": 1.082331895828247, "logits/rejected": 1.129996418952942, "logps/chosen": -2.4224135875701904, "logps/rejected": -7.541356086730957, "loss": 0.6521, "nll_loss": 0.6446340084075928, "rewards/accuracies": 1.0, "rewards/chosen": -0.24224135279655457, "rewards/margins": 0.5118942856788635, "rewards/rejected": -0.7541356086730957, "step": 5798 }, { "epoch": 15.876796714579056, "grad_norm": 6.932394504547119, "learning_rate": 2.056164383561644e-07, "log_odds_chosen": 0.9898879528045654, "log_odds_ratio": -0.614043116569519, "logits/chosen": 1.149244785308838, "logits/rejected": 1.1868302822113037, "logps/chosen": -2.521481513977051, "logps/rejected": -3.404569149017334, "loss": 0.6663, "nll_loss": 0.6048803329467773, "rewards/accuracies": 0.875, "rewards/chosen": -0.2521481513977051, "rewards/margins": 0.08830876648426056, "rewards/rejected": -0.34045693278312683, "step": 5799 }, { "epoch": 15.879534565366187, "grad_norm": 3.2858049869537354, "learning_rate": 2.054794520547945e-07, "log_odds_chosen": 3.8912158012390137, "log_odds_ratio": -0.10997646301984787, "logits/chosen": 0.7623561024665833, "logits/rejected": 0.7206090092658997, "logps/chosen": -1.030466079711914, "logps/rejected": -4.472716331481934, "loss": 0.5674, "nll_loss": 0.5563874840736389, "rewards/accuracies": 1.0, "rewards/chosen": -0.10304659605026245, "rewards/margins": 0.34422507882118225, "rewards/rejected": -0.4472716450691223, "step": 5800 }, { "epoch": 15.88227241615332, "grad_norm": 6.007054805755615, "learning_rate": 2.0534246575342464e-07, "log_odds_chosen": 1.1991610527038574, "log_odds_ratio": -0.4794897437095642, "logits/chosen": 1.197106122970581, "logits/rejected": 1.2174655199050903, "logps/chosen": -2.2758896350860596, "logps/rejected": -3.4016575813293457, "loss": 0.5946, "nll_loss": 0.5466064214706421, "rewards/accuracies": 0.75, "rewards/chosen": -0.22758899629116058, "rewards/margins": 0.11257676780223846, "rewards/rejected": -0.34016576409339905, "step": 5801 }, { "epoch": 15.885010266940451, "grad_norm": 3.7991514205932617, "learning_rate": 2.052054794520548e-07, "log_odds_chosen": 1.8410987854003906, "log_odds_ratio": -0.22218288481235504, "logits/chosen": 1.1739498376846313, "logits/rejected": 1.1918272972106934, "logps/chosen": -1.5142638683319092, "logps/rejected": -3.0741820335388184, "loss": 0.4528, "nll_loss": 0.4305564761161804, "rewards/accuracies": 1.0, "rewards/chosen": -0.15142638981342316, "rewards/margins": 0.1559918075799942, "rewards/rejected": -0.30741819739341736, "step": 5802 }, { "epoch": 15.887748117727584, "grad_norm": 3.516916036605835, "learning_rate": 2.0506849315068492e-07, "log_odds_chosen": 3.5298709869384766, "log_odds_ratio": -0.16221243143081665, "logits/chosen": 0.9521753787994385, "logits/rejected": 0.9332308769226074, "logps/chosen": -1.8185300827026367, "logps/rejected": -5.167473316192627, "loss": 0.6184, "nll_loss": 0.602189302444458, "rewards/accuracies": 0.875, "rewards/chosen": -0.1818530261516571, "rewards/margins": 0.3348943293094635, "rewards/rejected": -0.5167473554611206, "step": 5803 }, { "epoch": 15.890485968514716, "grad_norm": 3.691237688064575, "learning_rate": 2.0493150684931507e-07, "log_odds_chosen": 3.0349621772766113, "log_odds_ratio": -0.18719246983528137, "logits/chosen": 1.0334110260009766, "logits/rejected": 1.07136869430542, "logps/chosen": -2.4124755859375, "logps/rejected": -5.038867950439453, "loss": 0.5922, "nll_loss": 0.5734788775444031, "rewards/accuracies": 0.875, "rewards/chosen": -0.24124756455421448, "rewards/margins": 0.26263925433158875, "rewards/rejected": -0.5038868188858032, "step": 5804 }, { "epoch": 15.893223819301848, "grad_norm": 5.4925217628479, "learning_rate": 2.047945205479452e-07, "log_odds_chosen": 2.4931187629699707, "log_odds_ratio": -0.4590103328227997, "logits/chosen": 1.1427828073501587, "logits/rejected": 1.0921742916107178, "logps/chosen": -2.7218117713928223, "logps/rejected": -5.1182451248168945, "loss": 0.6592, "nll_loss": 0.6133370399475098, "rewards/accuracies": 0.875, "rewards/chosen": -0.2721811830997467, "rewards/margins": 0.23964336514472961, "rewards/rejected": -0.5118245482444763, "step": 5805 }, { "epoch": 15.89596167008898, "grad_norm": 3.6490259170532227, "learning_rate": 2.0465753424657532e-07, "log_odds_chosen": 1.3015148639678955, "log_odds_ratio": -0.45484989881515503, "logits/chosen": 0.8938323259353638, "logits/rejected": 0.9685956239700317, "logps/chosen": -1.537083625793457, "logps/rejected": -2.723606586456299, "loss": 0.5135, "nll_loss": 0.4680097997188568, "rewards/accuracies": 0.75, "rewards/chosen": -0.15370836853981018, "rewards/margins": 0.11865229904651642, "rewards/rejected": -0.2723606526851654, "step": 5806 }, { "epoch": 15.898699520876113, "grad_norm": 3.844564437866211, "learning_rate": 2.0452054794520545e-07, "log_odds_chosen": 2.4214601516723633, "log_odds_ratio": -0.22795957326889038, "logits/chosen": 0.9529621601104736, "logits/rejected": 0.8867882490158081, "logps/chosen": -1.8588981628417969, "logps/rejected": -4.114982604980469, "loss": 0.5201, "nll_loss": 0.49734944105148315, "rewards/accuracies": 1.0, "rewards/chosen": -0.1858898103237152, "rewards/margins": 0.2256084680557251, "rewards/rejected": -0.4114983081817627, "step": 5807 }, { "epoch": 15.901437371663244, "grad_norm": 3.5083794593811035, "learning_rate": 2.043835616438356e-07, "log_odds_chosen": 2.481093645095825, "log_odds_ratio": -0.16663914918899536, "logits/chosen": 0.9375978112220764, "logits/rejected": 0.9528273940086365, "logps/chosen": -1.7105116844177246, "logps/rejected": -4.019308090209961, "loss": 0.5528, "nll_loss": 0.5361754298210144, "rewards/accuracies": 1.0, "rewards/chosen": -0.17105118930339813, "rewards/margins": 0.23087963461875916, "rewards/rejected": -0.4019308090209961, "step": 5808 }, { "epoch": 15.904175222450377, "grad_norm": 7.361148357391357, "learning_rate": 2.0424657534246575e-07, "log_odds_chosen": 2.3699493408203125, "log_odds_ratio": -0.21921128034591675, "logits/chosen": 1.1142301559448242, "logits/rejected": 1.1456068754196167, "logps/chosen": -2.044644832611084, "logps/rejected": -4.16886043548584, "loss": 0.5963, "nll_loss": 0.5744156837463379, "rewards/accuracies": 1.0, "rewards/chosen": -0.20446446537971497, "rewards/margins": 0.21242162585258484, "rewards/rejected": -0.4168861210346222, "step": 5809 }, { "epoch": 15.906913073237508, "grad_norm": 3.5262393951416016, "learning_rate": 2.0410958904109588e-07, "log_odds_chosen": 2.9700772762298584, "log_odds_ratio": -0.2660496234893799, "logits/chosen": 0.951759934425354, "logits/rejected": 0.9185370802879333, "logps/chosen": -1.5675098896026611, "logps/rejected": -4.149460792541504, "loss": 0.4571, "nll_loss": 0.4305194616317749, "rewards/accuracies": 1.0, "rewards/chosen": -0.15675099194049835, "rewards/margins": 0.25819510221481323, "rewards/rejected": -0.4149460792541504, "step": 5810 }, { "epoch": 15.90965092402464, "grad_norm": 3.254772424697876, "learning_rate": 2.0397260273972603e-07, "log_odds_chosen": 5.278879642486572, "log_odds_ratio": -0.026943014934659004, "logits/chosen": 1.4605636596679688, "logits/rejected": 1.5233345031738281, "logps/chosen": -1.798856496810913, "logps/rejected": -6.618385314941406, "loss": 0.4495, "nll_loss": 0.44684281945228577, "rewards/accuracies": 1.0, "rewards/chosen": -0.1798856556415558, "rewards/margins": 0.4819529056549072, "rewards/rejected": -0.6618385314941406, "step": 5811 }, { "epoch": 15.912388774811772, "grad_norm": 4.320668697357178, "learning_rate": 2.0383561643835616e-07, "log_odds_chosen": 1.5712558031082153, "log_odds_ratio": -0.3183933198451996, "logits/chosen": 1.0774917602539062, "logits/rejected": 1.0156033039093018, "logps/chosen": -1.9039541482925415, "logps/rejected": -3.3622512817382812, "loss": 0.5529, "nll_loss": 0.5210899114608765, "rewards/accuracies": 0.875, "rewards/chosen": -0.19039541482925415, "rewards/margins": 0.1458297222852707, "rewards/rejected": -0.33622515201568604, "step": 5812 }, { "epoch": 15.915126625598905, "grad_norm": 5.564560413360596, "learning_rate": 2.0369863013698628e-07, "log_odds_chosen": 2.0545811653137207, "log_odds_ratio": -0.6148978471755981, "logits/chosen": 1.163249135017395, "logits/rejected": 1.2061123847961426, "logps/chosen": -2.9581103324890137, "logps/rejected": -4.929990768432617, "loss": 0.6575, "nll_loss": 0.5960497856140137, "rewards/accuracies": 0.75, "rewards/chosen": -0.2958110570907593, "rewards/margins": 0.19718803465366364, "rewards/rejected": -0.4929990768432617, "step": 5813 }, { "epoch": 15.917864476386036, "grad_norm": 3.7114830017089844, "learning_rate": 2.0356164383561643e-07, "log_odds_chosen": 3.4235568046569824, "log_odds_ratio": -0.10822227597236633, "logits/chosen": 0.8669543862342834, "logits/rejected": 0.7967237830162048, "logps/chosen": -1.2132210731506348, "logps/rejected": -4.303327560424805, "loss": 0.3967, "nll_loss": 0.38584280014038086, "rewards/accuracies": 1.0, "rewards/chosen": -0.12132211029529572, "rewards/margins": 0.3090106248855591, "rewards/rejected": -0.430332750082016, "step": 5814 }, { "epoch": 15.92060232717317, "grad_norm": 4.559023380279541, "learning_rate": 2.0342465753424656e-07, "log_odds_chosen": 1.8623288869857788, "log_odds_ratio": -0.2511143982410431, "logits/chosen": 1.2564305067062378, "logits/rejected": 1.1838319301605225, "logps/chosen": -1.5037537813186646, "logps/rejected": -3.042642116546631, "loss": 0.4173, "nll_loss": 0.39222073554992676, "rewards/accuracies": 1.0, "rewards/chosen": -0.1503753960132599, "rewards/margins": 0.15388885140419006, "rewards/rejected": -0.30426424741744995, "step": 5815 }, { "epoch": 15.923340177960302, "grad_norm": 4.1524882316589355, "learning_rate": 2.032876712328767e-07, "log_odds_chosen": 1.9223265647888184, "log_odds_ratio": -0.2660759389400482, "logits/chosen": 0.9143543243408203, "logits/rejected": 0.9957836866378784, "logps/chosen": -2.1201882362365723, "logps/rejected": -3.856505870819092, "loss": 0.5754, "nll_loss": 0.5487957000732422, "rewards/accuracies": 1.0, "rewards/chosen": -0.21201883256435394, "rewards/margins": 0.17363177239894867, "rewards/rejected": -0.3856506049633026, "step": 5816 }, { "epoch": 15.926078028747433, "grad_norm": 3.5691826343536377, "learning_rate": 2.0315068493150684e-07, "log_odds_chosen": 3.144716501235962, "log_odds_ratio": -0.0814933031797409, "logits/chosen": 0.9497798681259155, "logits/rejected": 0.9094140529632568, "logps/chosen": -1.9051798582077026, "logps/rejected": -4.856896877288818, "loss": 0.5038, "nll_loss": 0.4956570863723755, "rewards/accuracies": 1.0, "rewards/chosen": -0.19051799178123474, "rewards/margins": 0.29517170786857605, "rewards/rejected": -0.4856896996498108, "step": 5817 }, { "epoch": 15.928815879534564, "grad_norm": 3.296110153198242, "learning_rate": 2.03013698630137e-07, "log_odds_chosen": 2.9609897136688232, "log_odds_ratio": -0.13958515226840973, "logits/chosen": 1.0278470516204834, "logits/rejected": 1.028164267539978, "logps/chosen": -1.9098789691925049, "logps/rejected": -4.7005534172058105, "loss": 0.5146, "nll_loss": 0.5005965232849121, "rewards/accuracies": 1.0, "rewards/chosen": -0.19098789989948273, "rewards/margins": 0.2790674865245819, "rewards/rejected": -0.47005537152290344, "step": 5818 }, { "epoch": 15.931553730321697, "grad_norm": 3.508436441421509, "learning_rate": 2.0287671232876711e-07, "log_odds_chosen": 3.944373607635498, "log_odds_ratio": -0.16250288486480713, "logits/chosen": 1.457905888557434, "logits/rejected": 1.5177066326141357, "logps/chosen": -1.791204571723938, "logps/rejected": -5.559833526611328, "loss": 0.5545, "nll_loss": 0.5382241606712341, "rewards/accuracies": 0.875, "rewards/chosen": -0.17912045121192932, "rewards/margins": 0.37686288356781006, "rewards/rejected": -0.5559833645820618, "step": 5819 }, { "epoch": 15.93429158110883, "grad_norm": 3.6257827281951904, "learning_rate": 2.0273972602739724e-07, "log_odds_chosen": 2.6949822902679443, "log_odds_ratio": -0.17288997769355774, "logits/chosen": 1.340893268585205, "logits/rejected": 1.3390583992004395, "logps/chosen": -2.122304677963257, "logps/rejected": -4.663741111755371, "loss": 0.5342, "nll_loss": 0.5168805122375488, "rewards/accuracies": 1.0, "rewards/chosen": -0.21223047375679016, "rewards/margins": 0.2541436553001404, "rewards/rejected": -0.46637409925460815, "step": 5820 }, { "epoch": 15.937029431895962, "grad_norm": 3.5012214183807373, "learning_rate": 2.026027397260274e-07, "log_odds_chosen": 5.860872268676758, "log_odds_ratio": -0.1165112778544426, "logits/chosen": 1.0591142177581787, "logits/rejected": 1.0598021745681763, "logps/chosen": -1.4794484376907349, "logps/rejected": -7.061441898345947, "loss": 0.6305, "nll_loss": 0.6188037395477295, "rewards/accuracies": 1.0, "rewards/chosen": -0.1479448676109314, "rewards/margins": 0.5581992864608765, "rewards/rejected": -0.7061442136764526, "step": 5821 }, { "epoch": 15.939767282683095, "grad_norm": 3.7303414344787598, "learning_rate": 2.0246575342465752e-07, "log_odds_chosen": 3.0201189517974854, "log_odds_ratio": -0.34838902950286865, "logits/chosen": 1.1874938011169434, "logits/rejected": 1.1941602230072021, "logps/chosen": -1.8720920085906982, "logps/rejected": -4.802359580993652, "loss": 0.6201, "nll_loss": 0.5852832198143005, "rewards/accuracies": 0.75, "rewards/chosen": -0.18720920383930206, "rewards/margins": 0.29302680492401123, "rewards/rejected": -0.4802359938621521, "step": 5822 }, { "epoch": 15.942505133470226, "grad_norm": 3.946596145629883, "learning_rate": 2.0232876712328767e-07, "log_odds_chosen": 3.2313284873962402, "log_odds_ratio": -0.1584990918636322, "logits/chosen": 0.9602876901626587, "logits/rejected": 0.9328349828720093, "logps/chosen": -2.035407543182373, "logps/rejected": -5.088801860809326, "loss": 0.566, "nll_loss": 0.5501735210418701, "rewards/accuracies": 1.0, "rewards/chosen": -0.20354077219963074, "rewards/margins": 0.3053394556045532, "rewards/rejected": -0.5088801383972168, "step": 5823 }, { "epoch": 15.945242984257359, "grad_norm": 3.618337631225586, "learning_rate": 2.021917808219178e-07, "log_odds_chosen": 2.4965243339538574, "log_odds_ratio": -0.2015840858221054, "logits/chosen": 0.919264554977417, "logits/rejected": 0.9769479036331177, "logps/chosen": -1.6643807888031006, "logps/rejected": -3.9877312183380127, "loss": 0.4809, "nll_loss": 0.4607613682746887, "rewards/accuracies": 0.875, "rewards/chosen": -0.16643808782100677, "rewards/margins": 0.23233504593372345, "rewards/rejected": -0.3987731337547302, "step": 5824 }, { "epoch": 15.94798083504449, "grad_norm": 3.3486135005950928, "learning_rate": 2.0205479452054795e-07, "log_odds_chosen": 4.8427019119262695, "log_odds_ratio": -0.1337878257036209, "logits/chosen": 0.9340107440948486, "logits/rejected": 0.8878028392791748, "logps/chosen": -1.6973248720169067, "logps/rejected": -6.288185119628906, "loss": 0.4828, "nll_loss": 0.46942198276519775, "rewards/accuracies": 1.0, "rewards/chosen": -0.1697324961423874, "rewards/margins": 0.4590860605239868, "rewards/rejected": -0.6288185119628906, "step": 5825 }, { "epoch": 15.950718685831623, "grad_norm": 8.926475524902344, "learning_rate": 2.0191780821917805e-07, "log_odds_chosen": 1.8463492393493652, "log_odds_ratio": -0.5829865336418152, "logits/chosen": 0.9110692739486694, "logits/rejected": 0.9353170394897461, "logps/chosen": -2.12386417388916, "logps/rejected": -3.8502440452575684, "loss": 0.7013, "nll_loss": 0.6430255174636841, "rewards/accuracies": 0.875, "rewards/chosen": -0.21238641440868378, "rewards/margins": 0.1726379692554474, "rewards/rejected": -0.38502439856529236, "step": 5826 }, { "epoch": 15.953456536618754, "grad_norm": 3.509526252746582, "learning_rate": 2.017808219178082e-07, "log_odds_chosen": 3.3765809535980225, "log_odds_ratio": -0.05753143131732941, "logits/chosen": 1.151789903640747, "logits/rejected": 1.1902854442596436, "logps/chosen": -1.9963372945785522, "logps/rejected": -5.1512017250061035, "loss": 0.5649, "nll_loss": 0.5591645240783691, "rewards/accuracies": 1.0, "rewards/chosen": -0.19963371753692627, "rewards/margins": 0.31548643112182617, "rewards/rejected": -0.5151201486587524, "step": 5827 }, { "epoch": 15.956194387405887, "grad_norm": 3.485297203063965, "learning_rate": 2.0164383561643835e-07, "log_odds_chosen": 3.9608380794525146, "log_odds_ratio": -0.20122823119163513, "logits/chosen": 1.138805627822876, "logits/rejected": 1.1329103708267212, "logps/chosen": -1.685960054397583, "logps/rejected": -5.430456638336182, "loss": 0.5028, "nll_loss": 0.4827173054218292, "rewards/accuracies": 1.0, "rewards/chosen": -0.1685960292816162, "rewards/margins": 0.3744497001171112, "rewards/rejected": -0.543045699596405, "step": 5828 }, { "epoch": 15.958932238193018, "grad_norm": 5.805508613586426, "learning_rate": 2.0150684931506848e-07, "log_odds_chosen": 2.2123823165893555, "log_odds_ratio": -0.27894049882888794, "logits/chosen": 0.897963285446167, "logits/rejected": 0.8813048601150513, "logps/chosen": -2.1009938716888428, "logps/rejected": -4.214825630187988, "loss": 0.5132, "nll_loss": 0.48532867431640625, "rewards/accuracies": 1.0, "rewards/chosen": -0.21009939908981323, "rewards/margins": 0.21138320863246918, "rewards/rejected": -0.4214825928211212, "step": 5829 }, { "epoch": 15.961670088980151, "grad_norm": 4.984151363372803, "learning_rate": 2.0136986301369863e-07, "log_odds_chosen": 2.450451135635376, "log_odds_ratio": -0.18313409388065338, "logits/chosen": 0.8909534811973572, "logits/rejected": 0.714704155921936, "logps/chosen": -1.5414661169052124, "logps/rejected": -3.7734570503234863, "loss": 0.5668, "nll_loss": 0.5484517216682434, "rewards/accuracies": 1.0, "rewards/chosen": -0.15414661169052124, "rewards/margins": 0.22319909930229187, "rewards/rejected": -0.3773457109928131, "step": 5830 }, { "epoch": 15.964407939767282, "grad_norm": 3.7358813285827637, "learning_rate": 2.0123287671232875e-07, "log_odds_chosen": 2.06833815574646, "log_odds_ratio": -0.2145727574825287, "logits/chosen": 1.0109024047851562, "logits/rejected": 1.0118176937103271, "logps/chosen": -1.8280181884765625, "logps/rejected": -3.7149810791015625, "loss": 0.524, "nll_loss": 0.5025357007980347, "rewards/accuracies": 1.0, "rewards/chosen": -0.18280181288719177, "rewards/margins": 0.18869632482528687, "rewards/rejected": -0.37149813771247864, "step": 5831 }, { "epoch": 15.967145790554415, "grad_norm": 5.186190128326416, "learning_rate": 2.010958904109589e-07, "log_odds_chosen": 3.07539701461792, "log_odds_ratio": -0.17681027948856354, "logits/chosen": 1.1082268953323364, "logits/rejected": 1.1972678899765015, "logps/chosen": -2.341893196105957, "logps/rejected": -5.29404354095459, "loss": 0.6349, "nll_loss": 0.6172623634338379, "rewards/accuracies": 1.0, "rewards/chosen": -0.23418933153152466, "rewards/margins": 0.29521504044532776, "rewards/rejected": -0.52940434217453, "step": 5832 }, { "epoch": 15.969883641341546, "grad_norm": 5.47518253326416, "learning_rate": 2.0095890410958903e-07, "log_odds_chosen": 2.7261533737182617, "log_odds_ratio": -0.21818098425865173, "logits/chosen": 0.8790783286094666, "logits/rejected": 0.804398775100708, "logps/chosen": -2.0167770385742188, "logps/rejected": -4.5725417137146, "loss": 0.5677, "nll_loss": 0.545926034450531, "rewards/accuracies": 0.875, "rewards/chosen": -0.20167770981788635, "rewards/margins": 0.255576491355896, "rewards/rejected": -0.45725423097610474, "step": 5833 }, { "epoch": 15.97262149212868, "grad_norm": 4.228787422180176, "learning_rate": 2.0082191780821916e-07, "log_odds_chosen": 1.494828701019287, "log_odds_ratio": -0.31196174025535583, "logits/chosen": 0.8245130777359009, "logits/rejected": 0.8263695240020752, "logps/chosen": -1.9383400678634644, "logps/rejected": -3.3051860332489014, "loss": 0.5178, "nll_loss": 0.4866082966327667, "rewards/accuracies": 1.0, "rewards/chosen": -0.19383400678634644, "rewards/margins": 0.1366845965385437, "rewards/rejected": -0.33051860332489014, "step": 5834 }, { "epoch": 15.97535934291581, "grad_norm": 3.6092708110809326, "learning_rate": 2.006849315068493e-07, "log_odds_chosen": 2.3906219005584717, "log_odds_ratio": -0.2909599840641022, "logits/chosen": 1.2285082340240479, "logits/rejected": 1.2124004364013672, "logps/chosen": -1.3350199460983276, "logps/rejected": -3.437263011932373, "loss": 0.5268, "nll_loss": 0.49775391817092896, "rewards/accuracies": 0.875, "rewards/chosen": -0.13350199162960052, "rewards/margins": 0.21022431552410126, "rewards/rejected": -0.3437263071537018, "step": 5835 }, { "epoch": 15.978097193702943, "grad_norm": 3.836310386657715, "learning_rate": 2.0054794520547944e-07, "log_odds_chosen": 2.693432331085205, "log_odds_ratio": -0.31428706645965576, "logits/chosen": 0.9706356525421143, "logits/rejected": 0.9936810731887817, "logps/chosen": -2.0765719413757324, "logps/rejected": -4.556890487670898, "loss": 0.6179, "nll_loss": 0.5864661931991577, "rewards/accuracies": 0.75, "rewards/chosen": -0.20765718817710876, "rewards/margins": 0.2480318695306778, "rewards/rejected": -0.45568907260894775, "step": 5836 }, { "epoch": 15.980835044490075, "grad_norm": 11.466731071472168, "learning_rate": 2.004109589041096e-07, "log_odds_chosen": 1.4883372783660889, "log_odds_ratio": -0.36552077531814575, "logits/chosen": 1.1327695846557617, "logits/rejected": 1.081094741821289, "logps/chosen": -2.0627400875091553, "logps/rejected": -3.3791747093200684, "loss": 0.5351, "nll_loss": 0.4985552132129669, "rewards/accuracies": 0.875, "rewards/chosen": -0.20627400279045105, "rewards/margins": 0.13164347410202026, "rewards/rejected": -0.3379175066947937, "step": 5837 }, { "epoch": 15.983572895277208, "grad_norm": 3.920883893966675, "learning_rate": 2.0027397260273971e-07, "log_odds_chosen": 2.322925329208374, "log_odds_ratio": -0.2230665683746338, "logits/chosen": 0.8929812908172607, "logits/rejected": 0.9289931058883667, "logps/chosen": -2.0605885982513428, "logps/rejected": -4.247958660125732, "loss": 0.573, "nll_loss": 0.5507359504699707, "rewards/accuracies": 1.0, "rewards/chosen": -0.20605885982513428, "rewards/margins": 0.21873697638511658, "rewards/rejected": -0.42479586601257324, "step": 5838 }, { "epoch": 15.986310746064339, "grad_norm": 3.652886152267456, "learning_rate": 2.0013698630136987e-07, "log_odds_chosen": 2.311854839324951, "log_odds_ratio": -0.17187446355819702, "logits/chosen": 0.799870491027832, "logits/rejected": 0.7685721516609192, "logps/chosen": -2.072618007659912, "logps/rejected": -4.197668552398682, "loss": 0.4387, "nll_loss": 0.42152169346809387, "rewards/accuracies": 1.0, "rewards/chosen": -0.2072618007659912, "rewards/margins": 0.212505042552948, "rewards/rejected": -0.4197668433189392, "step": 5839 }, { "epoch": 15.989048596851472, "grad_norm": 3.8741869926452637, "learning_rate": 2e-07, "log_odds_chosen": 1.8613331317901611, "log_odds_ratio": -0.22551769018173218, "logits/chosen": 1.0640404224395752, "logits/rejected": 1.0237224102020264, "logps/chosen": -1.631831407546997, "logps/rejected": -3.282958984375, "loss": 0.5097, "nll_loss": 0.4871373772621155, "rewards/accuracies": 1.0, "rewards/chosen": -0.16318315267562866, "rewards/margins": 0.16511276364326477, "rewards/rejected": -0.32829588651657104, "step": 5840 }, { "epoch": 15.991786447638603, "grad_norm": 4.4309797286987305, "learning_rate": 1.9986301369863012e-07, "log_odds_chosen": 1.8244264125823975, "log_odds_ratio": -0.29260551929473877, "logits/chosen": 1.002114176750183, "logits/rejected": 1.0277094841003418, "logps/chosen": -2.1691973209381104, "logps/rejected": -3.890425682067871, "loss": 0.6021, "nll_loss": 0.5728818774223328, "rewards/accuracies": 0.875, "rewards/chosen": -0.21691972017288208, "rewards/margins": 0.17212283611297607, "rewards/rejected": -0.38904255628585815, "step": 5841 }, { "epoch": 15.994524298425736, "grad_norm": 4.038403034210205, "learning_rate": 1.9972602739726027e-07, "log_odds_chosen": 3.4894375801086426, "log_odds_ratio": -0.09628145396709442, "logits/chosen": 1.1749299764633179, "logits/rejected": 1.2430239915847778, "logps/chosen": -1.7588086128234863, "logps/rejected": -5.027218818664551, "loss": 0.6408, "nll_loss": 0.6311511993408203, "rewards/accuracies": 1.0, "rewards/chosen": -0.17588084936141968, "rewards/margins": 0.3268410563468933, "rewards/rejected": -0.502721905708313, "step": 5842 }, { "epoch": 15.997262149212869, "grad_norm": 3.8913934230804443, "learning_rate": 1.995890410958904e-07, "log_odds_chosen": 2.7729413509368896, "log_odds_ratio": -0.25141119956970215, "logits/chosen": 1.0050830841064453, "logits/rejected": 0.9919157028198242, "logps/chosen": -2.2607169151306152, "logps/rejected": -4.901657581329346, "loss": 0.5655, "nll_loss": 0.5403953194618225, "rewards/accuracies": 1.0, "rewards/chosen": -0.22607168555259705, "rewards/margins": 0.2640940546989441, "rewards/rejected": -0.4901657700538635, "step": 5843 }, { "epoch": 16.0, "grad_norm": 4.189545631408691, "learning_rate": 1.9945205479452055e-07, "log_odds_chosen": 2.430323362350464, "log_odds_ratio": -0.35967642068862915, "logits/chosen": 0.8053796887397766, "logits/rejected": 0.7718747854232788, "logps/chosen": -1.8833211660385132, "logps/rejected": -4.175013542175293, "loss": 0.5586, "nll_loss": 0.5226417779922485, "rewards/accuracies": 0.875, "rewards/chosen": -0.18833211064338684, "rewards/margins": 0.22916929423809052, "rewards/rejected": -0.41750138998031616, "step": 5844 }, { "epoch": 16.00273785078713, "grad_norm": 3.503502368927002, "learning_rate": 1.9931506849315067e-07, "log_odds_chosen": 2.1466970443725586, "log_odds_ratio": -0.2630821466445923, "logits/chosen": 1.282818078994751, "logits/rejected": 1.191064715385437, "logps/chosen": -1.5519752502441406, "logps/rejected": -3.531888008117676, "loss": 0.4353, "nll_loss": 0.4090036451816559, "rewards/accuracies": 1.0, "rewards/chosen": -0.15519753098487854, "rewards/margins": 0.1979912519454956, "rewards/rejected": -0.35318878293037415, "step": 5845 }, { "epoch": 16.005475701574266, "grad_norm": 7.103723526000977, "learning_rate": 1.991780821917808e-07, "log_odds_chosen": 2.7344870567321777, "log_odds_ratio": -0.42585861682891846, "logits/chosen": 1.507961392402649, "logits/rejected": 1.4768437147140503, "logps/chosen": -2.2158100605010986, "logps/rejected": -4.839322566986084, "loss": 0.5104, "nll_loss": 0.46781355142593384, "rewards/accuracies": 0.875, "rewards/chosen": -0.22158102691173553, "rewards/margins": 0.26235127449035645, "rewards/rejected": -0.4839322865009308, "step": 5846 }, { "epoch": 16.008213552361397, "grad_norm": 6.314413070678711, "learning_rate": 1.9904109589041095e-07, "log_odds_chosen": 1.180261254310608, "log_odds_ratio": -0.4676213264465332, "logits/chosen": 0.9466537237167358, "logits/rejected": 0.8913711905479431, "logps/chosen": -1.8710957765579224, "logps/rejected": -2.8530712127685547, "loss": 0.4588, "nll_loss": 0.41206011176109314, "rewards/accuracies": 0.75, "rewards/chosen": -0.1871095895767212, "rewards/margins": 0.09819753468036652, "rewards/rejected": -0.2853071093559265, "step": 5847 }, { "epoch": 16.01095140314853, "grad_norm": 3.2443630695343018, "learning_rate": 1.9890410958904108e-07, "log_odds_chosen": 1.797321081161499, "log_odds_ratio": -0.1848762482404709, "logits/chosen": 1.0858453512191772, "logits/rejected": 1.0566980838775635, "logps/chosen": -1.5324373245239258, "logps/rejected": -3.053626298904419, "loss": 0.5531, "nll_loss": 0.5346213579177856, "rewards/accuracies": 1.0, "rewards/chosen": -0.15324372053146362, "rewards/margins": 0.15211889147758484, "rewards/rejected": -0.30536261200904846, "step": 5848 }, { "epoch": 16.01368925393566, "grad_norm": 3.1919851303100586, "learning_rate": 1.9876712328767123e-07, "log_odds_chosen": 3.7786951065063477, "log_odds_ratio": -0.16464313864707947, "logits/chosen": 1.2049565315246582, "logits/rejected": 1.1977944374084473, "logps/chosen": -1.6987687349319458, "logps/rejected": -5.307036399841309, "loss": 0.5111, "nll_loss": 0.4946061968803406, "rewards/accuracies": 1.0, "rewards/chosen": -0.16987687349319458, "rewards/margins": 0.3608267903327942, "rewards/rejected": -0.5307036638259888, "step": 5849 }, { "epoch": 16.016427104722794, "grad_norm": 4.359983921051025, "learning_rate": 1.9863013698630135e-07, "log_odds_chosen": 2.8538782596588135, "log_odds_ratio": -0.28103265166282654, "logits/chosen": 1.2785990238189697, "logits/rejected": 1.2857176065444946, "logps/chosen": -1.498271107673645, "logps/rejected": -3.991105079650879, "loss": 0.4554, "nll_loss": 0.427314817905426, "rewards/accuracies": 0.875, "rewards/chosen": -0.14982709288597107, "rewards/margins": 0.24928340315818787, "rewards/rejected": -0.3991105258464813, "step": 5850 }, { "epoch": 16.019164955509925, "grad_norm": 3.7252590656280518, "learning_rate": 1.984931506849315e-07, "log_odds_chosen": 4.180121898651123, "log_odds_ratio": -0.06530199944972992, "logits/chosen": 1.1552623510360718, "logits/rejected": 1.2014625072479248, "logps/chosen": -2.2173733711242676, "logps/rejected": -6.204701900482178, "loss": 0.6399, "nll_loss": 0.633366584777832, "rewards/accuracies": 1.0, "rewards/chosen": -0.221737340092659, "rewards/margins": 0.39873284101486206, "rewards/rejected": -0.6204701662063599, "step": 5851 }, { "epoch": 16.021902806297057, "grad_norm": 4.272519588470459, "learning_rate": 1.9835616438356166e-07, "log_odds_chosen": 1.4627337455749512, "log_odds_ratio": -0.3339962065219879, "logits/chosen": 1.232372760772705, "logits/rejected": 1.187353491783142, "logps/chosen": -1.773103952407837, "logps/rejected": -3.092649459838867, "loss": 0.4672, "nll_loss": 0.4337918162345886, "rewards/accuracies": 0.875, "rewards/chosen": -0.17731040716171265, "rewards/margins": 0.13195453584194183, "rewards/rejected": -0.3092649281024933, "step": 5852 }, { "epoch": 16.024640657084188, "grad_norm": 4.140407562255859, "learning_rate": 1.9821917808219176e-07, "log_odds_chosen": 2.6271615028381348, "log_odds_ratio": -0.13831940293312073, "logits/chosen": 1.0735942125320435, "logits/rejected": 1.022095799446106, "logps/chosen": -1.6102774143218994, "logps/rejected": -4.006483554840088, "loss": 0.447, "nll_loss": 0.4331449866294861, "rewards/accuracies": 1.0, "rewards/chosen": -0.16102774441242218, "rewards/margins": 0.239620640873909, "rewards/rejected": -0.4006483852863312, "step": 5853 }, { "epoch": 16.027378507871322, "grad_norm": 3.9399425983428955, "learning_rate": 1.980821917808219e-07, "log_odds_chosen": 4.261867523193359, "log_odds_ratio": -0.09404219686985016, "logits/chosen": 0.8961118459701538, "logits/rejected": 0.8302274346351624, "logps/chosen": -1.2923693656921387, "logps/rejected": -5.247941970825195, "loss": 0.4756, "nll_loss": 0.46618467569351196, "rewards/accuracies": 1.0, "rewards/chosen": -0.12923693656921387, "rewards/margins": 0.3955572843551636, "rewards/rejected": -0.5247942209243774, "step": 5854 }, { "epoch": 16.030116358658454, "grad_norm": 5.501074314117432, "learning_rate": 1.9794520547945203e-07, "log_odds_chosen": 2.28360652923584, "log_odds_ratio": -0.35455137491226196, "logits/chosen": 0.859102725982666, "logits/rejected": 0.8734627366065979, "logps/chosen": -1.9775761365890503, "logps/rejected": -4.037636756896973, "loss": 0.5712, "nll_loss": 0.5357706546783447, "rewards/accuracies": 0.875, "rewards/chosen": -0.19775761663913727, "rewards/margins": 0.20600606501102448, "rewards/rejected": -0.40376368165016174, "step": 5855 }, { "epoch": 16.032854209445585, "grad_norm": 3.6411075592041016, "learning_rate": 1.9780821917808219e-07, "log_odds_chosen": 2.7622668743133545, "log_odds_ratio": -0.31537875533103943, "logits/chosen": 0.924170732498169, "logits/rejected": 0.9250798225402832, "logps/chosen": -1.4849121570587158, "logps/rejected": -3.9713850021362305, "loss": 0.5065, "nll_loss": 0.4749562740325928, "rewards/accuracies": 0.875, "rewards/chosen": -0.14849120378494263, "rewards/margins": 0.2486473023891449, "rewards/rejected": -0.3971385061740875, "step": 5856 }, { "epoch": 16.035592060232716, "grad_norm": 6.1522626876831055, "learning_rate": 1.976712328767123e-07, "log_odds_chosen": 3.3672468662261963, "log_odds_ratio": -0.15901556611061096, "logits/chosen": 1.2701880931854248, "logits/rejected": 1.2933489084243774, "logps/chosen": -2.5643084049224854, "logps/rejected": -5.805663108825684, "loss": 0.5534, "nll_loss": 0.5374737977981567, "rewards/accuracies": 1.0, "rewards/chosen": -0.25643083453178406, "rewards/margins": 0.3241354823112488, "rewards/rejected": -0.5805663466453552, "step": 5857 }, { "epoch": 16.03832991101985, "grad_norm": 4.282410621643066, "learning_rate": 1.9753424657534246e-07, "log_odds_chosen": 3.195467472076416, "log_odds_ratio": -0.11006936430931091, "logits/chosen": 1.1933934688568115, "logits/rejected": 1.2499141693115234, "logps/chosen": -2.215895175933838, "logps/rejected": -5.290914535522461, "loss": 0.6607, "nll_loss": 0.6496888399124146, "rewards/accuracies": 1.0, "rewards/chosen": -0.22158953547477722, "rewards/margins": 0.3075019121170044, "rewards/rejected": -0.529091477394104, "step": 5858 }, { "epoch": 16.041067761806982, "grad_norm": 6.442115306854248, "learning_rate": 1.9739726027397262e-07, "log_odds_chosen": 5.477596282958984, "log_odds_ratio": -0.09349608421325684, "logits/chosen": 1.2240540981292725, "logits/rejected": 1.1563068628311157, "logps/chosen": -1.7224071025848389, "logps/rejected": -6.759758472442627, "loss": 0.5907, "nll_loss": 0.5813119411468506, "rewards/accuracies": 1.0, "rewards/chosen": -0.1722407042980194, "rewards/margins": 0.5037351250648499, "rewards/rejected": -0.6759758591651917, "step": 5859 }, { "epoch": 16.043805612594113, "grad_norm": 4.307287216186523, "learning_rate": 1.9726027397260271e-07, "log_odds_chosen": 3.8027966022491455, "log_odds_ratio": -0.1393216848373413, "logits/chosen": 1.0308719873428345, "logits/rejected": 1.0365756750106812, "logps/chosen": -2.1932950019836426, "logps/rejected": -5.845634937286377, "loss": 0.6455, "nll_loss": 0.6316139101982117, "rewards/accuracies": 1.0, "rewards/chosen": -0.21932952105998993, "rewards/margins": 0.3652339577674866, "rewards/rejected": -0.5845634937286377, "step": 5860 }, { "epoch": 16.046543463381244, "grad_norm": 4.009319305419922, "learning_rate": 1.9712328767123287e-07, "log_odds_chosen": 2.7407195568084717, "log_odds_ratio": -0.20927532017230988, "logits/chosen": 0.8932303786277771, "logits/rejected": 0.9341167211532593, "logps/chosen": -2.2221288681030273, "logps/rejected": -4.852268218994141, "loss": 0.5904, "nll_loss": 0.5694425106048584, "rewards/accuracies": 1.0, "rewards/chosen": -0.22221288084983826, "rewards/margins": 0.26301392912864685, "rewards/rejected": -0.4852268099784851, "step": 5861 }, { "epoch": 16.04928131416838, "grad_norm": 4.4384636878967285, "learning_rate": 1.96986301369863e-07, "log_odds_chosen": 2.6587021350860596, "log_odds_ratio": -0.279074490070343, "logits/chosen": 1.0649583339691162, "logits/rejected": 1.020723819732666, "logps/chosen": -1.8641571998596191, "logps/rejected": -4.330655097961426, "loss": 0.5355, "nll_loss": 0.507568895816803, "rewards/accuracies": 1.0, "rewards/chosen": -0.18641573190689087, "rewards/margins": 0.24664980173110962, "rewards/rejected": -0.4330655336380005, "step": 5862 }, { "epoch": 16.05201916495551, "grad_norm": 9.517863273620605, "learning_rate": 1.9684931506849314e-07, "log_odds_chosen": 1.9367655515670776, "log_odds_ratio": -0.3274729251861572, "logits/chosen": 1.1106016635894775, "logits/rejected": 1.0371277332305908, "logps/chosen": -1.5729129314422607, "logps/rejected": -3.211151361465454, "loss": 0.5632, "nll_loss": 0.5304135084152222, "rewards/accuracies": 0.875, "rewards/chosen": -0.15729129314422607, "rewards/margins": 0.16382385790348053, "rewards/rejected": -0.3211151361465454, "step": 5863 }, { "epoch": 16.05475701574264, "grad_norm": 5.848281383514404, "learning_rate": 1.967123287671233e-07, "log_odds_chosen": 3.934859275817871, "log_odds_ratio": -0.4066157042980194, "logits/chosen": 1.2052147388458252, "logits/rejected": 1.199082851409912, "logps/chosen": -2.5202889442443848, "logps/rejected": -6.350490093231201, "loss": 0.7066, "nll_loss": 0.6659850478172302, "rewards/accuracies": 0.875, "rewards/chosen": -0.2520288825035095, "rewards/margins": 0.3830201327800751, "rewards/rejected": -0.6350489854812622, "step": 5864 }, { "epoch": 16.057494866529773, "grad_norm": 4.118653774261475, "learning_rate": 1.9657534246575342e-07, "log_odds_chosen": 3.707366704940796, "log_odds_ratio": -0.13267675042152405, "logits/chosen": 1.011723279953003, "logits/rejected": 1.0007762908935547, "logps/chosen": -1.7552704811096191, "logps/rejected": -5.268754005432129, "loss": 0.6388, "nll_loss": 0.6255424618721008, "rewards/accuracies": 1.0, "rewards/chosen": -0.17552705109119415, "rewards/margins": 0.351348340511322, "rewards/rejected": -0.526875376701355, "step": 5865 }, { "epoch": 16.060232717316907, "grad_norm": 3.4922220706939697, "learning_rate": 1.9643835616438357e-07, "log_odds_chosen": 1.5901026725769043, "log_odds_ratio": -0.4304581880569458, "logits/chosen": 0.9440094232559204, "logits/rejected": 0.9517095685005188, "logps/chosen": -2.1203701496124268, "logps/rejected": -3.532604217529297, "loss": 0.5572, "nll_loss": 0.5141240358352661, "rewards/accuracies": 0.875, "rewards/chosen": -0.21203702688217163, "rewards/margins": 0.14122341573238373, "rewards/rejected": -0.35326042771339417, "step": 5866 }, { "epoch": 16.06297056810404, "grad_norm": 3.7059853076934814, "learning_rate": 1.9630136986301367e-07, "log_odds_chosen": 2.5853123664855957, "log_odds_ratio": -0.21280698478221893, "logits/chosen": 1.1821482181549072, "logits/rejected": 1.1901198625564575, "logps/chosen": -1.9353892803192139, "logps/rejected": -4.374823570251465, "loss": 0.5488, "nll_loss": 0.5274978876113892, "rewards/accuracies": 1.0, "rewards/chosen": -0.19353891909122467, "rewards/margins": 0.24394343793392181, "rewards/rejected": -0.4374823570251465, "step": 5867 }, { "epoch": 16.06570841889117, "grad_norm": 4.134032726287842, "learning_rate": 1.9616438356164383e-07, "log_odds_chosen": 2.6476659774780273, "log_odds_ratio": -0.37876617908477783, "logits/chosen": 1.1114269495010376, "logits/rejected": 1.1126092672348022, "logps/chosen": -2.1666224002838135, "logps/rejected": -4.583295822143555, "loss": 0.5793, "nll_loss": 0.5414130091667175, "rewards/accuracies": 0.875, "rewards/chosen": -0.2166622430086136, "rewards/margins": 0.24166733026504517, "rewards/rejected": -0.45832955837249756, "step": 5868 }, { "epoch": 16.0684462696783, "grad_norm": 3.6113810539245605, "learning_rate": 1.9602739726027395e-07, "log_odds_chosen": 1.9151173830032349, "log_odds_ratio": -0.2996935248374939, "logits/chosen": 0.8585502505302429, "logits/rejected": 0.8748782277107239, "logps/chosen": -1.7745968103408813, "logps/rejected": -3.5709609985351562, "loss": 0.5676, "nll_loss": 0.5376536846160889, "rewards/accuracies": 1.0, "rewards/chosen": -0.1774596869945526, "rewards/margins": 0.17963643372058868, "rewards/rejected": -0.3570961058139801, "step": 5869 }, { "epoch": 16.071184120465436, "grad_norm": 6.588468074798584, "learning_rate": 1.958904109589041e-07, "log_odds_chosen": 4.378442764282227, "log_odds_ratio": -0.19780723750591278, "logits/chosen": 1.0000739097595215, "logits/rejected": 0.9670993089675903, "logps/chosen": -1.889674425125122, "logps/rejected": -6.049915313720703, "loss": 0.574, "nll_loss": 0.5542683601379395, "rewards/accuracies": 0.875, "rewards/chosen": -0.18896745145320892, "rewards/margins": 0.4160241484642029, "rewards/rejected": -0.6049915552139282, "step": 5870 }, { "epoch": 16.073921971252567, "grad_norm": 4.216766357421875, "learning_rate": 1.9575342465753426e-07, "log_odds_chosen": 2.3512213230133057, "log_odds_ratio": -0.21246767044067383, "logits/chosen": 1.054347038269043, "logits/rejected": 1.0724904537200928, "logps/chosen": -1.5295584201812744, "logps/rejected": -3.640451431274414, "loss": 0.6355, "nll_loss": 0.6142235994338989, "rewards/accuracies": 1.0, "rewards/chosen": -0.15295584499835968, "rewards/margins": 0.21108929812908173, "rewards/rejected": -0.3640451431274414, "step": 5871 }, { "epoch": 16.076659822039698, "grad_norm": 4.395900726318359, "learning_rate": 1.9561643835616438e-07, "log_odds_chosen": 1.9543596506118774, "log_odds_ratio": -0.4862765073776245, "logits/chosen": 1.125529408454895, "logits/rejected": 1.0819205045700073, "logps/chosen": -1.8161698579788208, "logps/rejected": -3.634955883026123, "loss": 0.4666, "nll_loss": 0.41796398162841797, "rewards/accuracies": 0.75, "rewards/chosen": -0.18161700665950775, "rewards/margins": 0.18187861144542694, "rewards/rejected": -0.3634955883026123, "step": 5872 }, { "epoch": 16.079397672826833, "grad_norm": 3.504363775253296, "learning_rate": 1.954794520547945e-07, "log_odds_chosen": 4.004830360412598, "log_odds_ratio": -0.16286177933216095, "logits/chosen": 1.1186230182647705, "logits/rejected": 1.111196756362915, "logps/chosen": -1.9530787467956543, "logps/rejected": -5.825667381286621, "loss": 0.5253, "nll_loss": 0.5089649558067322, "rewards/accuracies": 1.0, "rewards/chosen": -0.1953078806400299, "rewards/margins": 0.3872588276863098, "rewards/rejected": -0.5825667381286621, "step": 5873 }, { "epoch": 16.082135523613964, "grad_norm": 3.356701135635376, "learning_rate": 1.9534246575342463e-07, "log_odds_chosen": 4.028660297393799, "log_odds_ratio": -0.17594453692436218, "logits/chosen": 0.981752872467041, "logits/rejected": 1.0127404928207397, "logps/chosen": -2.0530290603637695, "logps/rejected": -5.951486587524414, "loss": 0.4865, "nll_loss": 0.46891701221466064, "rewards/accuracies": 1.0, "rewards/chosen": -0.20530292391777039, "rewards/margins": 0.38984572887420654, "rewards/rejected": -0.5951486229896545, "step": 5874 }, { "epoch": 16.084873374401095, "grad_norm": 4.557773113250732, "learning_rate": 1.9520547945205478e-07, "log_odds_chosen": 1.8210430145263672, "log_odds_ratio": -0.30546966195106506, "logits/chosen": 1.0791581869125366, "logits/rejected": 1.0764482021331787, "logps/chosen": -2.761503219604492, "logps/rejected": -4.480812072753906, "loss": 0.6407, "nll_loss": 0.6101722121238708, "rewards/accuracies": 1.0, "rewards/chosen": -0.27615034580230713, "rewards/margins": 0.17193089425563812, "rewards/rejected": -0.44808125495910645, "step": 5875 }, { "epoch": 16.087611225188226, "grad_norm": 3.3456361293792725, "learning_rate": 1.950684931506849e-07, "log_odds_chosen": 2.6550683975219727, "log_odds_ratio": -0.19912993907928467, "logits/chosen": 1.0958869457244873, "logits/rejected": 1.0817090272903442, "logps/chosen": -1.4227514266967773, "logps/rejected": -3.813504695892334, "loss": 0.5217, "nll_loss": 0.5018197894096375, "rewards/accuracies": 1.0, "rewards/chosen": -0.1422751545906067, "rewards/margins": 0.23907536268234253, "rewards/rejected": -0.3813505172729492, "step": 5876 }, { "epoch": 16.09034907597536, "grad_norm": 6.055799961090088, "learning_rate": 1.9493150684931506e-07, "log_odds_chosen": 1.7183642387390137, "log_odds_ratio": -0.3781346082687378, "logits/chosen": 1.2383902072906494, "logits/rejected": 1.252089262008667, "logps/chosen": -2.845324993133545, "logps/rejected": -4.442950248718262, "loss": 0.5931, "nll_loss": 0.5552452802658081, "rewards/accuracies": 0.75, "rewards/chosen": -0.28453248739242554, "rewards/margins": 0.15976250171661377, "rewards/rejected": -0.4442950189113617, "step": 5877 }, { "epoch": 16.093086926762492, "grad_norm": 3.7493178844451904, "learning_rate": 1.9479452054794521e-07, "log_odds_chosen": 1.7508008480072021, "log_odds_ratio": -0.2876960337162018, "logits/chosen": 1.2716763019561768, "logits/rejected": 1.2774533033370972, "logps/chosen": -2.3698182106018066, "logps/rejected": -3.986907958984375, "loss": 0.6034, "nll_loss": 0.574618935585022, "rewards/accuracies": 1.0, "rewards/chosen": -0.23698179423809052, "rewards/margins": 0.1617090404033661, "rewards/rejected": -0.3986908197402954, "step": 5878 }, { "epoch": 16.095824777549623, "grad_norm": 4.509490013122559, "learning_rate": 1.9465753424657534e-07, "log_odds_chosen": 2.6850380897521973, "log_odds_ratio": -0.10568656772375107, "logits/chosen": 1.2518888711929321, "logits/rejected": 1.3509594202041626, "logps/chosen": -2.0623321533203125, "logps/rejected": -4.583014965057373, "loss": 0.5776, "nll_loss": 0.5670056939125061, "rewards/accuracies": 1.0, "rewards/chosen": -0.2062332183122635, "rewards/margins": 0.25206831097602844, "rewards/rejected": -0.45830148458480835, "step": 5879 }, { "epoch": 16.098562628336754, "grad_norm": 3.897850275039673, "learning_rate": 1.9452054794520547e-07, "log_odds_chosen": 2.9881691932678223, "log_odds_ratio": -0.17529721558094025, "logits/chosen": 1.1736140251159668, "logits/rejected": 1.1440404653549194, "logps/chosen": -1.4132437705993652, "logps/rejected": -4.079497814178467, "loss": 0.4105, "nll_loss": 0.39295586943626404, "rewards/accuracies": 1.0, "rewards/chosen": -0.14132437109947205, "rewards/margins": 0.26662537455558777, "rewards/rejected": -0.4079497456550598, "step": 5880 }, { "epoch": 16.10130047912389, "grad_norm": 4.311883449554443, "learning_rate": 1.943835616438356e-07, "log_odds_chosen": 2.0867764949798584, "log_odds_ratio": -0.29606667160987854, "logits/chosen": 1.2012337446212769, "logits/rejected": 1.2017977237701416, "logps/chosen": -2.1538751125335693, "logps/rejected": -4.063198089599609, "loss": 0.5395, "nll_loss": 0.5098856687545776, "rewards/accuracies": 0.75, "rewards/chosen": -0.2153875231742859, "rewards/margins": 0.1909322887659073, "rewards/rejected": -0.40631982684135437, "step": 5881 }, { "epoch": 16.10403832991102, "grad_norm": 3.848019599914551, "learning_rate": 1.9424657534246574e-07, "log_odds_chosen": 2.2776103019714355, "log_odds_ratio": -0.1899051070213318, "logits/chosen": 1.1863386631011963, "logits/rejected": 1.0397950410842896, "logps/chosen": -1.3081073760986328, "logps/rejected": -3.344036340713501, "loss": 0.4275, "nll_loss": 0.40854308009147644, "rewards/accuracies": 1.0, "rewards/chosen": -0.13081073760986328, "rewards/margins": 0.20359289646148682, "rewards/rejected": -0.3344036340713501, "step": 5882 }, { "epoch": 16.10677618069815, "grad_norm": 3.843390703201294, "learning_rate": 1.941095890410959e-07, "log_odds_chosen": 4.226863861083984, "log_odds_ratio": -0.12969136238098145, "logits/chosen": 1.2095105648040771, "logits/rejected": 1.163364052772522, "logps/chosen": -1.7681095600128174, "logps/rejected": -5.771669387817383, "loss": 0.4856, "nll_loss": 0.4726736545562744, "rewards/accuracies": 1.0, "rewards/chosen": -0.17681095004081726, "rewards/margins": 0.4003560245037079, "rewards/rejected": -0.5771669745445251, "step": 5883 }, { "epoch": 16.109514031485283, "grad_norm": 4.787437438964844, "learning_rate": 1.9397260273972602e-07, "log_odds_chosen": 3.7537107467651367, "log_odds_ratio": -0.23643311858177185, "logits/chosen": 1.047342300415039, "logits/rejected": 1.0660810470581055, "logps/chosen": -1.8160395622253418, "logps/rejected": -5.381862163543701, "loss": 0.5066, "nll_loss": 0.48292994499206543, "rewards/accuracies": 0.875, "rewards/chosen": -0.18160395324230194, "rewards/margins": 0.35658225417137146, "rewards/rejected": -0.5381861925125122, "step": 5884 }, { "epoch": 16.112251882272417, "grad_norm": 3.988611936569214, "learning_rate": 1.9383561643835617e-07, "log_odds_chosen": 2.0735507011413574, "log_odds_ratio": -0.19631919264793396, "logits/chosen": 1.1578370332717896, "logits/rejected": 1.1845362186431885, "logps/chosen": -1.9047963619232178, "logps/rejected": -3.814462184906006, "loss": 0.5758, "nll_loss": 0.5561320781707764, "rewards/accuracies": 1.0, "rewards/chosen": -0.19047963619232178, "rewards/margins": 0.19096660614013672, "rewards/rejected": -0.3814462423324585, "step": 5885 }, { "epoch": 16.11498973305955, "grad_norm": 3.965719699859619, "learning_rate": 1.936986301369863e-07, "log_odds_chosen": 2.620267868041992, "log_odds_ratio": -0.21162593364715576, "logits/chosen": 1.1231633424758911, "logits/rejected": 1.162318468093872, "logps/chosen": -2.186600685119629, "logps/rejected": -4.598051071166992, "loss": 0.6046, "nll_loss": 0.5833920240402222, "rewards/accuracies": 1.0, "rewards/chosen": -0.21866007149219513, "rewards/margins": 0.24114501476287842, "rewards/rejected": -0.45980510115623474, "step": 5886 }, { "epoch": 16.11772758384668, "grad_norm": 8.517763137817383, "learning_rate": 1.9356164383561642e-07, "log_odds_chosen": 3.2118823528289795, "log_odds_ratio": -0.376267671585083, "logits/chosen": 0.9094631671905518, "logits/rejected": 0.7553443908691406, "logps/chosen": -2.010474681854248, "logps/rejected": -4.915892124176025, "loss": 0.6666, "nll_loss": 0.628984272480011, "rewards/accuracies": 0.875, "rewards/chosen": -0.20104746520519257, "rewards/margins": 0.29054176807403564, "rewards/rejected": -0.4915892481803894, "step": 5887 }, { "epoch": 16.12046543463381, "grad_norm": 4.150144100189209, "learning_rate": 1.9342465753424655e-07, "log_odds_chosen": 1.3800526857376099, "log_odds_ratio": -0.3862123191356659, "logits/chosen": 1.2491060495376587, "logits/rejected": 1.1897752285003662, "logps/chosen": -1.6529808044433594, "logps/rejected": -2.8828330039978027, "loss": 0.4257, "nll_loss": 0.3871237635612488, "rewards/accuracies": 0.75, "rewards/chosen": -0.16529807448387146, "rewards/margins": 0.12298519164323807, "rewards/rejected": -0.2882832884788513, "step": 5888 }, { "epoch": 16.123203285420946, "grad_norm": 7.056586742401123, "learning_rate": 1.932876712328767e-07, "log_odds_chosen": 2.2424144744873047, "log_odds_ratio": -0.3471651077270508, "logits/chosen": 1.0654780864715576, "logits/rejected": 1.0706217288970947, "logps/chosen": -2.5180516242980957, "logps/rejected": -4.648599147796631, "loss": 0.6579, "nll_loss": 0.6231949925422668, "rewards/accuracies": 0.875, "rewards/chosen": -0.2518051564693451, "rewards/margins": 0.21305476129055023, "rewards/rejected": -0.46485990285873413, "step": 5889 }, { "epoch": 16.125941136208077, "grad_norm": 3.585617780685425, "learning_rate": 1.9315068493150685e-07, "log_odds_chosen": 2.4753353595733643, "log_odds_ratio": -0.16718436777591705, "logits/chosen": 1.1875617504119873, "logits/rejected": 1.1898829936981201, "logps/chosen": -1.5830316543579102, "logps/rejected": -3.8232803344726562, "loss": 0.5306, "nll_loss": 0.5138609409332275, "rewards/accuracies": 1.0, "rewards/chosen": -0.1583031713962555, "rewards/margins": 0.22402487695217133, "rewards/rejected": -0.3823280334472656, "step": 5890 }, { "epoch": 16.128678986995208, "grad_norm": 3.6833126544952393, "learning_rate": 1.9301369863013698e-07, "log_odds_chosen": 3.280104160308838, "log_odds_ratio": -0.06567972898483276, "logits/chosen": 1.288061261177063, "logits/rejected": 1.3301496505737305, "logps/chosen": -2.024930477142334, "logps/rejected": -4.991703033447266, "loss": 0.6045, "nll_loss": 0.5979639887809753, "rewards/accuracies": 1.0, "rewards/chosen": -0.20249304175376892, "rewards/margins": 0.29667729139328003, "rewards/rejected": -0.49917030334472656, "step": 5891 }, { "epoch": 16.13141683778234, "grad_norm": 3.436526298522949, "learning_rate": 1.9287671232876713e-07, "log_odds_chosen": 3.7849812507629395, "log_odds_ratio": -0.10316319018602371, "logits/chosen": 1.1367135047912598, "logits/rejected": 1.1627051830291748, "logps/chosen": -1.9356712102890015, "logps/rejected": -5.509203910827637, "loss": 0.5141, "nll_loss": 0.5037434697151184, "rewards/accuracies": 1.0, "rewards/chosen": -0.19356712698936462, "rewards/margins": 0.3573532998561859, "rewards/rejected": -0.5509203672409058, "step": 5892 }, { "epoch": 16.134154688569474, "grad_norm": 3.468500852584839, "learning_rate": 1.9273972602739723e-07, "log_odds_chosen": 4.664595603942871, "log_odds_ratio": -0.09341409802436829, "logits/chosen": 1.1818197965621948, "logits/rejected": 1.2016276121139526, "logps/chosen": -2.001465082168579, "logps/rejected": -6.410919189453125, "loss": 0.5047, "nll_loss": 0.4953541159629822, "rewards/accuracies": 1.0, "rewards/chosen": -0.20014652609825134, "rewards/margins": 0.44094541668891907, "rewards/rejected": -0.6410919427871704, "step": 5893 }, { "epoch": 16.136892539356605, "grad_norm": 3.6832268238067627, "learning_rate": 1.9260273972602738e-07, "log_odds_chosen": 1.7694159746170044, "log_odds_ratio": -0.2100588083267212, "logits/chosen": 1.1908979415893555, "logits/rejected": 1.1949518918991089, "logps/chosen": -2.0050137042999268, "logps/rejected": -3.603069305419922, "loss": 0.5674, "nll_loss": 0.5463977456092834, "rewards/accuracies": 1.0, "rewards/chosen": -0.20050138235092163, "rewards/margins": 0.1598055362701416, "rewards/rejected": -0.36030691862106323, "step": 5894 }, { "epoch": 16.139630390143736, "grad_norm": 3.9297900199890137, "learning_rate": 1.924657534246575e-07, "log_odds_chosen": 2.9113545417785645, "log_odds_ratio": -0.13911773264408112, "logits/chosen": 0.8284415602684021, "logits/rejected": 0.8421028852462769, "logps/chosen": -1.9705908298492432, "logps/rejected": -4.707294940948486, "loss": 0.7244, "nll_loss": 0.7105081677436829, "rewards/accuracies": 1.0, "rewards/chosen": -0.19705908000469208, "rewards/margins": 0.2736704349517822, "rewards/rejected": -0.4707295298576355, "step": 5895 }, { "epoch": 16.142368240930868, "grad_norm": 6.115393161773682, "learning_rate": 1.9232876712328766e-07, "log_odds_chosen": 2.2772774696350098, "log_odds_ratio": -0.27157062292099, "logits/chosen": 1.1909453868865967, "logits/rejected": 1.265978455543518, "logps/chosen": -2.020850658416748, "logps/rejected": -4.058225154876709, "loss": 0.5142, "nll_loss": 0.4870619773864746, "rewards/accuracies": 1.0, "rewards/chosen": -0.20208507776260376, "rewards/margins": 0.20373745262622833, "rewards/rejected": -0.4058225154876709, "step": 5896 }, { "epoch": 16.145106091718002, "grad_norm": 3.9268410205841064, "learning_rate": 1.921917808219178e-07, "log_odds_chosen": 4.395428657531738, "log_odds_ratio": -0.15389156341552734, "logits/chosen": 0.8381372094154358, "logits/rejected": 0.7705063223838806, "logps/chosen": -1.359648585319519, "logps/rejected": -5.497951507568359, "loss": 0.522, "nll_loss": 0.5065828561782837, "rewards/accuracies": 1.0, "rewards/chosen": -0.13596487045288086, "rewards/margins": 0.41383033990859985, "rewards/rejected": -0.5497952103614807, "step": 5897 }, { "epoch": 16.147843942505133, "grad_norm": 4.8121113777160645, "learning_rate": 1.9205479452054794e-07, "log_odds_chosen": 1.7156111001968384, "log_odds_ratio": -0.3301601707935333, "logits/chosen": 0.9612959027290344, "logits/rejected": 0.900544285774231, "logps/chosen": -2.390585422515869, "logps/rejected": -4.024787902832031, "loss": 0.5656, "nll_loss": 0.53254634141922, "rewards/accuracies": 0.875, "rewards/chosen": -0.23905856907367706, "rewards/margins": 0.16342023015022278, "rewards/rejected": -0.40247875452041626, "step": 5898 }, { "epoch": 16.150581793292265, "grad_norm": 7.050097942352295, "learning_rate": 1.919178082191781e-07, "log_odds_chosen": 3.7861955165863037, "log_odds_ratio": -0.4461464285850525, "logits/chosen": 0.8846907615661621, "logits/rejected": 0.9359338283538818, "logps/chosen": -2.4771389961242676, "logps/rejected": -6.1839399337768555, "loss": 0.7355, "nll_loss": 0.6909258961677551, "rewards/accuracies": 0.875, "rewards/chosen": -0.24771392345428467, "rewards/margins": 0.370680034160614, "rewards/rejected": -0.6183940172195435, "step": 5899 }, { "epoch": 16.1533196440794, "grad_norm": 3.4366252422332764, "learning_rate": 1.917808219178082e-07, "log_odds_chosen": 4.000786781311035, "log_odds_ratio": -0.15587373077869415, "logits/chosen": 0.9010128974914551, "logits/rejected": 0.8275662660598755, "logps/chosen": -2.1885204315185547, "logps/rejected": -6.0155158042907715, "loss": 0.568, "nll_loss": 0.5524051189422607, "rewards/accuracies": 1.0, "rewards/chosen": -0.21885204315185547, "rewards/margins": 0.38269948959350586, "rewards/rejected": -0.6015515327453613, "step": 5900 }, { "epoch": 16.15605749486653, "grad_norm": 3.374225616455078, "learning_rate": 1.9164383561643834e-07, "log_odds_chosen": 5.824314594268799, "log_odds_ratio": -0.19569505751132965, "logits/chosen": 1.232673168182373, "logits/rejected": 1.3112444877624512, "logps/chosen": -2.136776924133301, "logps/rejected": -7.856884002685547, "loss": 0.581, "nll_loss": 0.5613898038864136, "rewards/accuracies": 0.875, "rewards/chosen": -0.21367768943309784, "rewards/margins": 0.5720107555389404, "rewards/rejected": -0.7856885194778442, "step": 5901 }, { "epoch": 16.15879534565366, "grad_norm": 3.694833993911743, "learning_rate": 1.915068493150685e-07, "log_odds_chosen": 1.8736746311187744, "log_odds_ratio": -0.29913976788520813, "logits/chosen": 1.0441234111785889, "logits/rejected": 0.9759243726730347, "logps/chosen": -2.1962521076202393, "logps/rejected": -3.809889316558838, "loss": 0.4653, "nll_loss": 0.4353613257408142, "rewards/accuracies": 1.0, "rewards/chosen": -0.21962521970272064, "rewards/margins": 0.16136373579502106, "rewards/rejected": -0.3809889256954193, "step": 5902 }, { "epoch": 16.161533196440793, "grad_norm": 7.938816547393799, "learning_rate": 1.9136986301369862e-07, "log_odds_chosen": 2.1632702350616455, "log_odds_ratio": -0.3638947904109955, "logits/chosen": 1.2802549600601196, "logits/rejected": 1.3154767751693726, "logps/chosen": -2.115278720855713, "logps/rejected": -4.039948463439941, "loss": 0.5392, "nll_loss": 0.5028596520423889, "rewards/accuracies": 0.875, "rewards/chosen": -0.21152788400650024, "rewards/margins": 0.19246697425842285, "rewards/rejected": -0.4039948582649231, "step": 5903 }, { "epoch": 16.164271047227928, "grad_norm": 3.0478858947753906, "learning_rate": 1.9123287671232877e-07, "log_odds_chosen": 3.0859224796295166, "log_odds_ratio": -0.26075559854507446, "logits/chosen": 1.1944506168365479, "logits/rejected": 1.1582387685775757, "logps/chosen": -1.6842910051345825, "logps/rejected": -4.636218547821045, "loss": 0.527, "nll_loss": 0.50089031457901, "rewards/accuracies": 1.0, "rewards/chosen": -0.16842910647392273, "rewards/margins": 0.29519277811050415, "rewards/rejected": -0.4636218547821045, "step": 5904 }, { "epoch": 16.16700889801506, "grad_norm": 12.511518478393555, "learning_rate": 1.910958904109589e-07, "log_odds_chosen": 1.5293421745300293, "log_odds_ratio": -0.3984162211418152, "logits/chosen": 1.4495651721954346, "logits/rejected": 1.3988062143325806, "logps/chosen": -2.0324788093566895, "logps/rejected": -3.4005649089813232, "loss": 0.4828, "nll_loss": 0.4429285526275635, "rewards/accuracies": 0.875, "rewards/chosen": -0.20324790477752686, "rewards/margins": 0.1368086040019989, "rewards/rejected": -0.34005650877952576, "step": 5905 }, { "epoch": 16.16974674880219, "grad_norm": 3.4910502433776855, "learning_rate": 1.9095890410958905e-07, "log_odds_chosen": 3.6921303272247314, "log_odds_ratio": -0.08492570370435715, "logits/chosen": 1.1307564973831177, "logits/rejected": 1.165829062461853, "logps/chosen": -2.2025058269500732, "logps/rejected": -5.753901958465576, "loss": 0.5795, "nll_loss": 0.5709638595581055, "rewards/accuracies": 1.0, "rewards/chosen": -0.22025059163570404, "rewards/margins": 0.3551395833492279, "rewards/rejected": -0.5753902196884155, "step": 5906 }, { "epoch": 16.17248459958932, "grad_norm": 8.295188903808594, "learning_rate": 1.9082191780821915e-07, "log_odds_chosen": 2.4270153045654297, "log_odds_ratio": -0.24839404225349426, "logits/chosen": 0.9745186567306519, "logits/rejected": 0.9610077738761902, "logps/chosen": -2.723823308944702, "logps/rejected": -5.009159088134766, "loss": 0.6379, "nll_loss": 0.6131010055541992, "rewards/accuracies": 0.875, "rewards/chosen": -0.27238231897354126, "rewards/margins": 0.22853362560272217, "rewards/rejected": -0.5009158849716187, "step": 5907 }, { "epoch": 16.175222450376456, "grad_norm": 3.471069097518921, "learning_rate": 1.906849315068493e-07, "log_odds_chosen": 4.699624538421631, "log_odds_ratio": -0.17931701242923737, "logits/chosen": 1.1419260501861572, "logits/rejected": 1.1791131496429443, "logps/chosen": -2.6506950855255127, "logps/rejected": -7.25181770324707, "loss": 0.5979, "nll_loss": 0.5799410343170166, "rewards/accuracies": 0.875, "rewards/chosen": -0.26506951451301575, "rewards/margins": 0.46011224389076233, "rewards/rejected": -0.7251817584037781, "step": 5908 }, { "epoch": 16.177960301163587, "grad_norm": 4.218281269073486, "learning_rate": 1.9054794520547945e-07, "log_odds_chosen": 1.7234597206115723, "log_odds_ratio": -0.29198938608169556, "logits/chosen": 1.3695799112319946, "logits/rejected": 1.3360774517059326, "logps/chosen": -2.0664308071136475, "logps/rejected": -3.630093812942505, "loss": 0.482, "nll_loss": 0.45277470350265503, "rewards/accuracies": 1.0, "rewards/chosen": -0.20664305984973907, "rewards/margins": 0.15636634826660156, "rewards/rejected": -0.36300939321517944, "step": 5909 }, { "epoch": 16.18069815195072, "grad_norm": 3.569406270980835, "learning_rate": 1.9041095890410958e-07, "log_odds_chosen": 3.400455951690674, "log_odds_ratio": -0.2881668508052826, "logits/chosen": 1.025043249130249, "logits/rejected": 1.0567946434020996, "logps/chosen": -1.8681626319885254, "logps/rejected": -5.095256805419922, "loss": 0.6661, "nll_loss": 0.6372654438018799, "rewards/accuracies": 0.75, "rewards/chosen": -0.1868162453174591, "rewards/margins": 0.3227093815803528, "rewards/rejected": -0.5095256567001343, "step": 5910 }, { "epoch": 16.18343600273785, "grad_norm": 3.6643893718719482, "learning_rate": 1.9027397260273973e-07, "log_odds_chosen": 2.0358266830444336, "log_odds_ratio": -0.41453737020492554, "logits/chosen": 0.9744808077812195, "logits/rejected": 0.9388955235481262, "logps/chosen": -1.693540334701538, "logps/rejected": -3.605184316635132, "loss": 0.5208, "nll_loss": 0.47932273149490356, "rewards/accuracies": 0.875, "rewards/chosen": -0.16935403645038605, "rewards/margins": 0.19116440415382385, "rewards/rejected": -0.3605184257030487, "step": 5911 }, { "epoch": 16.186173853524984, "grad_norm": 3.0799264907836914, "learning_rate": 1.9013698630136986e-07, "log_odds_chosen": 2.9619531631469727, "log_odds_ratio": -0.16672185063362122, "logits/chosen": 1.067133903503418, "logits/rejected": 0.9852683544158936, "logps/chosen": -1.4177711009979248, "logps/rejected": -4.141732692718506, "loss": 0.4639, "nll_loss": 0.44725143909454346, "rewards/accuracies": 1.0, "rewards/chosen": -0.14177711308002472, "rewards/margins": 0.27239614725112915, "rewards/rejected": -0.41417330503463745, "step": 5912 }, { "epoch": 16.188911704312115, "grad_norm": 4.027320384979248, "learning_rate": 1.8999999999999998e-07, "log_odds_chosen": 4.218904972076416, "log_odds_ratio": -0.11133003234863281, "logits/chosen": 1.1480437517166138, "logits/rejected": 1.1027804613113403, "logps/chosen": -1.349722981452942, "logps/rejected": -5.26181697845459, "loss": 0.5814, "nll_loss": 0.5702487230300903, "rewards/accuracies": 1.0, "rewards/chosen": -0.13497230410575867, "rewards/margins": 0.3912093937397003, "rewards/rejected": -0.526181697845459, "step": 5913 }, { "epoch": 16.191649555099247, "grad_norm": 4.032249927520752, "learning_rate": 1.898630136986301e-07, "log_odds_chosen": 3.05696964263916, "log_odds_ratio": -0.19403570890426636, "logits/chosen": 1.020675778388977, "logits/rejected": 1.002237319946289, "logps/chosen": -2.0581657886505127, "logps/rejected": -4.895355224609375, "loss": 0.6743, "nll_loss": 0.6548926830291748, "rewards/accuracies": 1.0, "rewards/chosen": -0.2058165967464447, "rewards/margins": 0.28371894359588623, "rewards/rejected": -0.48953554034233093, "step": 5914 }, { "epoch": 16.194387405886378, "grad_norm": 3.5253148078918457, "learning_rate": 1.8972602739726026e-07, "log_odds_chosen": 3.689429998397827, "log_odds_ratio": -0.2108800709247589, "logits/chosen": 1.0508548021316528, "logits/rejected": 1.1036311388015747, "logps/chosen": -2.0040087699890137, "logps/rejected": -5.546289443969727, "loss": 0.7094, "nll_loss": 0.6882634162902832, "rewards/accuracies": 1.0, "rewards/chosen": -0.20040085911750793, "rewards/margins": 0.35422807931900024, "rewards/rejected": -0.5546289682388306, "step": 5915 }, { "epoch": 16.197125256673512, "grad_norm": 3.7822279930114746, "learning_rate": 1.895890410958904e-07, "log_odds_chosen": 2.4302117824554443, "log_odds_ratio": -0.17007051408290863, "logits/chosen": 1.1481760740280151, "logits/rejected": 1.0877594947814941, "logps/chosen": -1.7009530067443848, "logps/rejected": -3.9210963249206543, "loss": 0.4565, "nll_loss": 0.4395325481891632, "rewards/accuracies": 1.0, "rewards/chosen": -0.1700953096151352, "rewards/margins": 0.22201436758041382, "rewards/rejected": -0.3921096920967102, "step": 5916 }, { "epoch": 16.199863107460644, "grad_norm": 4.643996715545654, "learning_rate": 1.8945205479452054e-07, "log_odds_chosen": 3.1664345264434814, "log_odds_ratio": -0.3015317916870117, "logits/chosen": 1.2031434774398804, "logits/rejected": 1.2795909643173218, "logps/chosen": -2.526765823364258, "logps/rejected": -5.580397605895996, "loss": 0.6271, "nll_loss": 0.5969101190567017, "rewards/accuracies": 0.875, "rewards/chosen": -0.2526766061782837, "rewards/margins": 0.30536311864852905, "rewards/rejected": -0.5580397248268127, "step": 5917 }, { "epoch": 16.202600958247775, "grad_norm": 3.716369152069092, "learning_rate": 1.893150684931507e-07, "log_odds_chosen": 3.854763984680176, "log_odds_ratio": -0.09336203336715698, "logits/chosen": 1.1645933389663696, "logits/rejected": 1.2221133708953857, "logps/chosen": -2.5689802169799805, "logps/rejected": -6.348866939544678, "loss": 0.5981, "nll_loss": 0.588782012462616, "rewards/accuracies": 1.0, "rewards/chosen": -0.25689804553985596, "rewards/margins": 0.37798863649368286, "rewards/rejected": -0.6348866820335388, "step": 5918 }, { "epoch": 16.205338809034906, "grad_norm": 7.617286205291748, "learning_rate": 1.8917808219178081e-07, "log_odds_chosen": 2.797053098678589, "log_odds_ratio": -0.6007990837097168, "logits/chosen": 1.3479115962982178, "logits/rejected": 1.3540650606155396, "logps/chosen": -2.489377021789551, "logps/rejected": -5.168981552124023, "loss": 0.6846, "nll_loss": 0.6245086193084717, "rewards/accuracies": 0.875, "rewards/chosen": -0.2489376962184906, "rewards/margins": 0.26796045899391174, "rewards/rejected": -0.5168981552124023, "step": 5919 }, { "epoch": 16.20807665982204, "grad_norm": 3.6824116706848145, "learning_rate": 1.8904109589041094e-07, "log_odds_chosen": 2.6224324703216553, "log_odds_ratio": -0.17910915613174438, "logits/chosen": 1.003630518913269, "logits/rejected": 0.9433215260505676, "logps/chosen": -1.4249733686447144, "logps/rejected": -3.831998109817505, "loss": 0.5151, "nll_loss": 0.4971885681152344, "rewards/accuracies": 1.0, "rewards/chosen": -0.14249733090400696, "rewards/margins": 0.24070246517658234, "rewards/rejected": -0.3831998109817505, "step": 5920 }, { "epoch": 16.210814510609172, "grad_norm": 3.3609371185302734, "learning_rate": 1.889041095890411e-07, "log_odds_chosen": 3.583306074142456, "log_odds_ratio": -0.11048992723226547, "logits/chosen": 1.065693736076355, "logits/rejected": 1.0286927223205566, "logps/chosen": -1.6570351123809814, "logps/rejected": -5.040090084075928, "loss": 0.4885, "nll_loss": 0.477437287569046, "rewards/accuracies": 1.0, "rewards/chosen": -0.16570350527763367, "rewards/margins": 0.3383055031299591, "rewards/rejected": -0.5040090084075928, "step": 5921 }, { "epoch": 16.213552361396303, "grad_norm": 13.228583335876465, "learning_rate": 1.8876712328767122e-07, "log_odds_chosen": 2.5070993900299072, "log_odds_ratio": -0.3847714364528656, "logits/chosen": 1.0477676391601562, "logits/rejected": 1.1106911897659302, "logps/chosen": -2.52079439163208, "logps/rejected": -4.799817085266113, "loss": 0.7535, "nll_loss": 0.7150241136550903, "rewards/accuracies": 0.875, "rewards/chosen": -0.25207948684692383, "rewards/margins": 0.22790224850177765, "rewards/rejected": -0.4799816906452179, "step": 5922 }, { "epoch": 16.216290212183434, "grad_norm": 3.7112247943878174, "learning_rate": 1.8863013698630137e-07, "log_odds_chosen": 2.3713486194610596, "log_odds_ratio": -0.3476652503013611, "logits/chosen": 0.8263949155807495, "logits/rejected": 0.7852658629417419, "logps/chosen": -1.6061298847198486, "logps/rejected": -3.8130998611450195, "loss": 0.5099, "nll_loss": 0.47514206171035767, "rewards/accuracies": 0.875, "rewards/chosen": -0.16061298549175262, "rewards/margins": 0.2206970453262329, "rewards/rejected": -0.38131001591682434, "step": 5923 }, { "epoch": 16.21902806297057, "grad_norm": 3.69738507270813, "learning_rate": 1.884931506849315e-07, "log_odds_chosen": 2.8691301345825195, "log_odds_ratio": -0.17732064425945282, "logits/chosen": 1.0787307024002075, "logits/rejected": 1.029933214187622, "logps/chosen": -2.0349364280700684, "logps/rejected": -4.681131839752197, "loss": 0.4557, "nll_loss": 0.43800681829452515, "rewards/accuracies": 1.0, "rewards/chosen": -0.2034936249256134, "rewards/margins": 0.2646195590496063, "rewards/rejected": -0.4681131839752197, "step": 5924 }, { "epoch": 16.2217659137577, "grad_norm": 3.633399724960327, "learning_rate": 1.8835616438356165e-07, "log_odds_chosen": 1.9110729694366455, "log_odds_ratio": -0.2621006369590759, "logits/chosen": 1.1088366508483887, "logits/rejected": 1.045041561126709, "logps/chosen": -1.675890326499939, "logps/rejected": -3.417922019958496, "loss": 0.485, "nll_loss": 0.45874282717704773, "rewards/accuracies": 1.0, "rewards/chosen": -0.16758903861045837, "rewards/margins": 0.17420315742492676, "rewards/rejected": -0.34179219603538513, "step": 5925 }, { "epoch": 16.22450376454483, "grad_norm": 3.8148694038391113, "learning_rate": 1.8821917808219177e-07, "log_odds_chosen": 2.965123414993286, "log_odds_ratio": -0.14084753394126892, "logits/chosen": 1.041762113571167, "logits/rejected": 1.0422887802124023, "logps/chosen": -1.6668733358383179, "logps/rejected": -4.444942474365234, "loss": 0.549, "nll_loss": 0.5349258184432983, "rewards/accuracies": 1.0, "rewards/chosen": -0.16668733954429626, "rewards/margins": 0.2778069078922272, "rewards/rejected": -0.44449424743652344, "step": 5926 }, { "epoch": 16.227241615331966, "grad_norm": 3.640918016433716, "learning_rate": 1.880821917808219e-07, "log_odds_chosen": 3.9417264461517334, "log_odds_ratio": -0.07078817486763, "logits/chosen": 1.0513360500335693, "logits/rejected": 1.0485337972640991, "logps/chosen": -1.869419813156128, "logps/rejected": -5.578906059265137, "loss": 0.5684, "nll_loss": 0.5613415241241455, "rewards/accuracies": 1.0, "rewards/chosen": -0.1869419813156128, "rewards/margins": 0.3709486722946167, "rewards/rejected": -0.5578906536102295, "step": 5927 }, { "epoch": 16.229979466119097, "grad_norm": 6.150641918182373, "learning_rate": 1.8794520547945205e-07, "log_odds_chosen": 1.9450232982635498, "log_odds_ratio": -0.2550114691257477, "logits/chosen": 0.7513100504875183, "logits/rejected": 0.7127434015274048, "logps/chosen": -2.325068235397339, "logps/rejected": -4.070465087890625, "loss": 0.5015, "nll_loss": 0.47598567605018616, "rewards/accuracies": 1.0, "rewards/chosen": -0.23250684142112732, "rewards/margins": 0.174539715051651, "rewards/rejected": -0.40704652667045593, "step": 5928 }, { "epoch": 16.23271731690623, "grad_norm": 13.392904281616211, "learning_rate": 1.8780821917808218e-07, "log_odds_chosen": 1.0580239295959473, "log_odds_ratio": -0.7942572832107544, "logits/chosen": 1.2415707111358643, "logits/rejected": 1.2649155855178833, "logps/chosen": -3.2883200645446777, "logps/rejected": -4.224077224731445, "loss": 0.7727, "nll_loss": 0.6932825446128845, "rewards/accuracies": 0.75, "rewards/chosen": -0.3288320302963257, "rewards/margins": 0.09357570111751556, "rewards/rejected": -0.42240768671035767, "step": 5929 }, { "epoch": 16.23545516769336, "grad_norm": 3.7347326278686523, "learning_rate": 1.8767123287671233e-07, "log_odds_chosen": 3.3711581230163574, "log_odds_ratio": -0.16347168385982513, "logits/chosen": 1.1142921447753906, "logits/rejected": 1.1044743061065674, "logps/chosen": -1.663217306137085, "logps/rejected": -4.7432541847229, "loss": 0.519, "nll_loss": 0.5026167035102844, "rewards/accuracies": 1.0, "rewards/chosen": -0.16632172465324402, "rewards/margins": 0.308003693819046, "rewards/rejected": -0.4743254482746124, "step": 5930 }, { "epoch": 16.238193018480494, "grad_norm": 4.585845947265625, "learning_rate": 1.8753424657534245e-07, "log_odds_chosen": 2.6998603343963623, "log_odds_ratio": -0.35667896270751953, "logits/chosen": 1.0268442630767822, "logits/rejected": 1.0175666809082031, "logps/chosen": -2.6960182189941406, "logps/rejected": -5.3225579261779785, "loss": 0.7572, "nll_loss": 0.7215168476104736, "rewards/accuracies": 0.875, "rewards/chosen": -0.26960182189941406, "rewards/margins": 0.26265400648117065, "rewards/rejected": -0.5322558283805847, "step": 5931 }, { "epoch": 16.240930869267626, "grad_norm": 3.407888650894165, "learning_rate": 1.873972602739726e-07, "log_odds_chosen": 5.160038471221924, "log_odds_ratio": -0.0692988932132721, "logits/chosen": 0.9317510724067688, "logits/rejected": 0.856475830078125, "logps/chosen": -2.1156232357025146, "logps/rejected": -7.010091781616211, "loss": 0.5674, "nll_loss": 0.5604308843612671, "rewards/accuracies": 1.0, "rewards/chosen": -0.21156233549118042, "rewards/margins": 0.48944687843322754, "rewards/rejected": -0.701009213924408, "step": 5932 }, { "epoch": 16.243668720054757, "grad_norm": 3.831026792526245, "learning_rate": 1.8726027397260276e-07, "log_odds_chosen": 2.2836174964904785, "log_odds_ratio": -0.2509811222553253, "logits/chosen": 0.9713943004608154, "logits/rejected": 0.9933463335037231, "logps/chosen": -1.876800298690796, "logps/rejected": -3.9985389709472656, "loss": 0.4774, "nll_loss": 0.45234882831573486, "rewards/accuracies": 0.875, "rewards/chosen": -0.18768005073070526, "rewards/margins": 0.21217384934425354, "rewards/rejected": -0.3998538851737976, "step": 5933 }, { "epoch": 16.246406570841888, "grad_norm": 3.5446667671203613, "learning_rate": 1.8712328767123286e-07, "log_odds_chosen": 3.2330398559570312, "log_odds_ratio": -0.1538407802581787, "logits/chosen": 0.859649658203125, "logits/rejected": 0.8562377095222473, "logps/chosen": -1.7284551858901978, "logps/rejected": -4.782078742980957, "loss": 0.5709, "nll_loss": 0.5555081367492676, "rewards/accuracies": 1.0, "rewards/chosen": -0.1728455275297165, "rewards/margins": 0.305362343788147, "rewards/rejected": -0.47820785641670227, "step": 5934 }, { "epoch": 16.249144421629023, "grad_norm": 4.056419372558594, "learning_rate": 1.86986301369863e-07, "log_odds_chosen": 3.028688907623291, "log_odds_ratio": -0.2333022952079773, "logits/chosen": 1.045743465423584, "logits/rejected": 1.086464762687683, "logps/chosen": -2.287936210632324, "logps/rejected": -5.1769609451293945, "loss": 0.5842, "nll_loss": 0.5609043836593628, "rewards/accuracies": 1.0, "rewards/chosen": -0.22879362106323242, "rewards/margins": 0.2889024615287781, "rewards/rejected": -0.5176961421966553, "step": 5935 }, { "epoch": 16.251882272416154, "grad_norm": 4.490408420562744, "learning_rate": 1.8684931506849313e-07, "log_odds_chosen": 2.2761783599853516, "log_odds_ratio": -0.37870174646377563, "logits/chosen": 1.2786165475845337, "logits/rejected": 1.2269572019577026, "logps/chosen": -1.6719610691070557, "logps/rejected": -3.6659536361694336, "loss": 0.5323, "nll_loss": 0.4944112300872803, "rewards/accuracies": 0.75, "rewards/chosen": -0.167196124792099, "rewards/margins": 0.19939923286437988, "rewards/rejected": -0.3665953576564789, "step": 5936 }, { "epoch": 16.254620123203285, "grad_norm": 3.579328775405884, "learning_rate": 1.8671232876712329e-07, "log_odds_chosen": 1.864198923110962, "log_odds_ratio": -0.23146162927150726, "logits/chosen": 1.224334955215454, "logits/rejected": 1.0772348642349243, "logps/chosen": -1.7133541107177734, "logps/rejected": -3.3949995040893555, "loss": 0.4349, "nll_loss": 0.4117286205291748, "rewards/accuracies": 0.875, "rewards/chosen": -0.17133541405200958, "rewards/margins": 0.16816453635692596, "rewards/rejected": -0.33949995040893555, "step": 5937 }, { "epoch": 16.257357973990416, "grad_norm": 3.6275880336761475, "learning_rate": 1.865753424657534e-07, "log_odds_chosen": 3.444646120071411, "log_odds_ratio": -0.10140867531299591, "logits/chosen": 1.097467303276062, "logits/rejected": 1.1991088390350342, "logps/chosen": -1.7833335399627686, "logps/rejected": -5.03469705581665, "loss": 0.5904, "nll_loss": 0.5802192091941833, "rewards/accuracies": 1.0, "rewards/chosen": -0.1783333718776703, "rewards/margins": 0.32513636350631714, "rewards/rejected": -0.503469705581665, "step": 5938 }, { "epoch": 16.26009582477755, "grad_norm": 5.1396284103393555, "learning_rate": 1.8643835616438356e-07, "log_odds_chosen": 1.8954087495803833, "log_odds_ratio": -0.4917139410972595, "logits/chosen": 1.0371737480163574, "logits/rejected": 1.0913233757019043, "logps/chosen": -2.454577684402466, "logps/rejected": -4.276101589202881, "loss": 0.5179, "nll_loss": 0.4687473177909851, "rewards/accuracies": 0.625, "rewards/chosen": -0.2454577386379242, "rewards/margins": 0.1821524202823639, "rewards/rejected": -0.4276101589202881, "step": 5939 }, { "epoch": 16.262833675564682, "grad_norm": 3.336378574371338, "learning_rate": 1.863013698630137e-07, "log_odds_chosen": 4.756742000579834, "log_odds_ratio": -0.0931553915143013, "logits/chosen": 0.8710430264472961, "logits/rejected": 0.8376909494400024, "logps/chosen": -1.507364273071289, "logps/rejected": -5.883358001708984, "loss": 0.6396, "nll_loss": 0.6302376985549927, "rewards/accuracies": 1.0, "rewards/chosen": -0.15073642134666443, "rewards/margins": 0.4375993609428406, "rewards/rejected": -0.5883358120918274, "step": 5940 }, { "epoch": 16.265571526351813, "grad_norm": 3.57753849029541, "learning_rate": 1.8616438356164382e-07, "log_odds_chosen": 3.916497230529785, "log_odds_ratio": -0.1415424793958664, "logits/chosen": 0.965690016746521, "logits/rejected": 1.0166441202163696, "logps/chosen": -2.0798792839050293, "logps/rejected": -5.705078125, "loss": 0.5518, "nll_loss": 0.5376185774803162, "rewards/accuracies": 1.0, "rewards/chosen": -0.2079879641532898, "rewards/margins": 0.3625198006629944, "rewards/rejected": -0.5705077648162842, "step": 5941 }, { "epoch": 16.268309377138944, "grad_norm": 5.266027927398682, "learning_rate": 1.8602739726027397e-07, "log_odds_chosen": 2.3176910877227783, "log_odds_ratio": -0.20800620317459106, "logits/chosen": 0.8162530064582825, "logits/rejected": 0.7872350215911865, "logps/chosen": -1.7801949977874756, "logps/rejected": -3.791484832763672, "loss": 0.5541, "nll_loss": 0.5332974791526794, "rewards/accuracies": 1.0, "rewards/chosen": -0.17801950871944427, "rewards/margins": 0.2011289894580841, "rewards/rejected": -0.3791485130786896, "step": 5942 }, { "epoch": 16.27104722792608, "grad_norm": 3.4055445194244385, "learning_rate": 1.858904109589041e-07, "log_odds_chosen": 1.5679378509521484, "log_odds_ratio": -0.33228835463523865, "logits/chosen": 1.147693395614624, "logits/rejected": 1.1747610569000244, "logps/chosen": -1.955137014389038, "logps/rejected": -3.3930485248565674, "loss": 0.562, "nll_loss": 0.5287489891052246, "rewards/accuracies": 0.875, "rewards/chosen": -0.19551371037960052, "rewards/margins": 0.14379116892814636, "rewards/rejected": -0.3393048644065857, "step": 5943 }, { "epoch": 16.27378507871321, "grad_norm": 5.736968040466309, "learning_rate": 1.8575342465753425e-07, "log_odds_chosen": 1.200793743133545, "log_odds_ratio": -0.35362765192985535, "logits/chosen": 1.137219786643982, "logits/rejected": 1.0132440328598022, "logps/chosen": -1.986882209777832, "logps/rejected": -3.1030497550964355, "loss": 0.604, "nll_loss": 0.5686800479888916, "rewards/accuracies": 0.875, "rewards/chosen": -0.19868822395801544, "rewards/margins": 0.11161673069000244, "rewards/rejected": -0.3103049695491791, "step": 5944 }, { "epoch": 16.27652292950034, "grad_norm": 3.3092851638793945, "learning_rate": 1.8561643835616437e-07, "log_odds_chosen": 3.513016700744629, "log_odds_ratio": -0.1623792052268982, "logits/chosen": 1.2164417505264282, "logits/rejected": 1.2181587219238281, "logps/chosen": -1.579099416732788, "logps/rejected": -4.856422424316406, "loss": 0.4375, "nll_loss": 0.4212672710418701, "rewards/accuracies": 1.0, "rewards/chosen": -0.15790994465351105, "rewards/margins": 0.32773226499557495, "rewards/rejected": -0.4856422543525696, "step": 5945 }, { "epoch": 16.279260780287473, "grad_norm": 3.5643486976623535, "learning_rate": 1.8547945205479452e-07, "log_odds_chosen": 2.0110831260681152, "log_odds_ratio": -0.27977150678634644, "logits/chosen": 1.183591604232788, "logits/rejected": 1.2095599174499512, "logps/chosen": -1.9375114440917969, "logps/rejected": -3.843815803527832, "loss": 0.5044, "nll_loss": 0.47637882828712463, "rewards/accuracies": 0.875, "rewards/chosen": -0.19375115633010864, "rewards/margins": 0.19063042104244232, "rewards/rejected": -0.38438156247138977, "step": 5946 }, { "epoch": 16.281998631074607, "grad_norm": 3.689845085144043, "learning_rate": 1.8534246575342465e-07, "log_odds_chosen": 3.8622207641601562, "log_odds_ratio": -0.12657177448272705, "logits/chosen": 1.1082215309143066, "logits/rejected": 1.1219302415847778, "logps/chosen": -1.7178220748901367, "logps/rejected": -5.272287845611572, "loss": 0.5318, "nll_loss": 0.5191188454627991, "rewards/accuracies": 1.0, "rewards/chosen": -0.1717822104692459, "rewards/margins": 0.35544654726982117, "rewards/rejected": -0.5272287726402283, "step": 5947 }, { "epoch": 16.28473648186174, "grad_norm": 3.4015259742736816, "learning_rate": 1.8520547945205477e-07, "log_odds_chosen": 2.8512630462646484, "log_odds_ratio": -0.10898198187351227, "logits/chosen": 1.191849708557129, "logits/rejected": 1.1561760902404785, "logps/chosen": -1.7626876831054688, "logps/rejected": -4.404489040374756, "loss": 0.4785, "nll_loss": 0.46761617064476013, "rewards/accuracies": 1.0, "rewards/chosen": -0.17626875638961792, "rewards/margins": 0.26418015360832214, "rewards/rejected": -0.44044890999794006, "step": 5948 }, { "epoch": 16.28747433264887, "grad_norm": 3.3678808212280273, "learning_rate": 1.8506849315068493e-07, "log_odds_chosen": 3.756488084793091, "log_odds_ratio": -0.19165796041488647, "logits/chosen": 1.1794939041137695, "logits/rejected": 1.2132432460784912, "logps/chosen": -1.5510687828063965, "logps/rejected": -5.102416038513184, "loss": 0.4549, "nll_loss": 0.4357372522354126, "rewards/accuracies": 1.0, "rewards/chosen": -0.15510688722133636, "rewards/margins": 0.3551347255706787, "rewards/rejected": -0.5102416276931763, "step": 5949 }, { "epoch": 16.290212183436005, "grad_norm": 4.17952299118042, "learning_rate": 1.8493150684931505e-07, "log_odds_chosen": 2.670452833175659, "log_odds_ratio": -0.23659715056419373, "logits/chosen": 0.9854679107666016, "logits/rejected": 1.0251768827438354, "logps/chosen": -1.974228858947754, "logps/rejected": -4.496271133422852, "loss": 0.6213, "nll_loss": 0.5976482629776001, "rewards/accuracies": 1.0, "rewards/chosen": -0.19742289185523987, "rewards/margins": 0.25220420956611633, "rewards/rejected": -0.4496271014213562, "step": 5950 }, { "epoch": 16.292950034223136, "grad_norm": 4.377220630645752, "learning_rate": 1.847945205479452e-07, "log_odds_chosen": 3.990769386291504, "log_odds_ratio": -0.16984635591506958, "logits/chosen": 1.1057288646697998, "logits/rejected": 1.094661831855774, "logps/chosen": -2.6843161582946777, "logps/rejected": -6.575921535491943, "loss": 0.4994, "nll_loss": 0.48243650794029236, "rewards/accuracies": 1.0, "rewards/chosen": -0.2684316039085388, "rewards/margins": 0.38916054368019104, "rewards/rejected": -0.6575921773910522, "step": 5951 }, { "epoch": 16.295687885010267, "grad_norm": 3.4273598194122314, "learning_rate": 1.8465753424657536e-07, "log_odds_chosen": 2.7080078125, "log_odds_ratio": -0.1641276478767395, "logits/chosen": 1.124420404434204, "logits/rejected": 1.083864450454712, "logps/chosen": -1.8686752319335938, "logps/rejected": -4.377642631530762, "loss": 0.6036, "nll_loss": 0.5872235298156738, "rewards/accuracies": 1.0, "rewards/chosen": -0.18686752021312714, "rewards/margins": 0.25089678168296814, "rewards/rejected": -0.4377642869949341, "step": 5952 }, { "epoch": 16.298425735797398, "grad_norm": 3.6379332542419434, "learning_rate": 1.8452054794520546e-07, "log_odds_chosen": 3.096158742904663, "log_odds_ratio": -0.15020065009593964, "logits/chosen": 1.185107946395874, "logits/rejected": 1.1760952472686768, "logps/chosen": -1.413033127784729, "logps/rejected": -4.196898460388184, "loss": 0.6255, "nll_loss": 0.6104707717895508, "rewards/accuracies": 1.0, "rewards/chosen": -0.14130331575870514, "rewards/margins": 0.27838653326034546, "rewards/rejected": -0.4196898341178894, "step": 5953 }, { "epoch": 16.301163586584533, "grad_norm": 8.789440155029297, "learning_rate": 1.843835616438356e-07, "log_odds_chosen": 1.7295750379562378, "log_odds_ratio": -0.24294286966323853, "logits/chosen": 0.9982944130897522, "logits/rejected": 0.9338558912277222, "logps/chosen": -2.388235330581665, "logps/rejected": -4.027592182159424, "loss": 0.5469, "nll_loss": 0.5225862860679626, "rewards/accuracies": 1.0, "rewards/chosen": -0.2388235330581665, "rewards/margins": 0.16393572092056274, "rewards/rejected": -0.40275922417640686, "step": 5954 }, { "epoch": 16.303901437371664, "grad_norm": 3.8391542434692383, "learning_rate": 1.8424657534246573e-07, "log_odds_chosen": 3.4412643909454346, "log_odds_ratio": -0.12198653817176819, "logits/chosen": 1.3598499298095703, "logits/rejected": 1.3405929803848267, "logps/chosen": -1.699237585067749, "logps/rejected": -4.917420387268066, "loss": 0.4659, "nll_loss": 0.4537000060081482, "rewards/accuracies": 1.0, "rewards/chosen": -0.16992376744747162, "rewards/margins": 0.3218182623386383, "rewards/rejected": -0.49174201488494873, "step": 5955 }, { "epoch": 16.306639288158795, "grad_norm": 6.422329902648926, "learning_rate": 1.8410958904109588e-07, "log_odds_chosen": 3.1909356117248535, "log_odds_ratio": -0.3515945374965668, "logits/chosen": 0.8601951599121094, "logits/rejected": 0.7909315228462219, "logps/chosen": -1.6031432151794434, "logps/rejected": -4.454061031341553, "loss": 0.484, "nll_loss": 0.4488818645477295, "rewards/accuracies": 0.875, "rewards/chosen": -0.16031432151794434, "rewards/margins": 0.2850917875766754, "rewards/rejected": -0.44540607929229736, "step": 5956 }, { "epoch": 16.309377138945926, "grad_norm": 3.8909595012664795, "learning_rate": 1.83972602739726e-07, "log_odds_chosen": 1.665454387664795, "log_odds_ratio": -0.3270173966884613, "logits/chosen": 0.8080079555511475, "logits/rejected": 0.7256527543067932, "logps/chosen": -1.8201048374176025, "logps/rejected": -3.3563954830169678, "loss": 0.5154, "nll_loss": 0.48273706436157227, "rewards/accuracies": 1.0, "rewards/chosen": -0.1820104718208313, "rewards/margins": 0.15362907946109772, "rewards/rejected": -0.3356395363807678, "step": 5957 }, { "epoch": 16.31211498973306, "grad_norm": 3.5290114879608154, "learning_rate": 1.8383561643835616e-07, "log_odds_chosen": 4.412073612213135, "log_odds_ratio": -0.055882565677165985, "logits/chosen": 1.19437575340271, "logits/rejected": 1.2727383375167847, "logps/chosen": -2.079524517059326, "logps/rejected": -6.3218607902526855, "loss": 0.5625, "nll_loss": 0.5568691492080688, "rewards/accuracies": 1.0, "rewards/chosen": -0.20795245468616486, "rewards/margins": 0.424233615398407, "rewards/rejected": -0.6321860551834106, "step": 5958 }, { "epoch": 16.314852840520192, "grad_norm": 6.298523902893066, "learning_rate": 1.8369863013698631e-07, "log_odds_chosen": 2.6546568870544434, "log_odds_ratio": -0.3763982951641083, "logits/chosen": 1.163568377494812, "logits/rejected": 1.2337987422943115, "logps/chosen": -2.496349811553955, "logps/rejected": -4.9860734939575195, "loss": 0.611, "nll_loss": 0.5733429789543152, "rewards/accuracies": 0.75, "rewards/chosen": -0.2496349811553955, "rewards/margins": 0.24897237122058868, "rewards/rejected": -0.498607337474823, "step": 5959 }, { "epoch": 16.317590691307323, "grad_norm": 5.46672248840332, "learning_rate": 1.8356164383561641e-07, "log_odds_chosen": 2.973837375640869, "log_odds_ratio": -0.1176815927028656, "logits/chosen": 1.1150661706924438, "logits/rejected": 1.1573094129562378, "logps/chosen": -1.9639575481414795, "logps/rejected": -4.741609573364258, "loss": 0.5296, "nll_loss": 0.5178565382957458, "rewards/accuracies": 1.0, "rewards/chosen": -0.19639573991298676, "rewards/margins": 0.2777652442455292, "rewards/rejected": -0.47416096925735474, "step": 5960 }, { "epoch": 16.320328542094455, "grad_norm": 3.4556052684783936, "learning_rate": 1.8342465753424657e-07, "log_odds_chosen": 2.193891763687134, "log_odds_ratio": -0.1665782928466797, "logits/chosen": 1.3201310634613037, "logits/rejected": 1.276534914970398, "logps/chosen": -1.4619814157485962, "logps/rejected": -3.412310838699341, "loss": 0.4533, "nll_loss": 0.4366380572319031, "rewards/accuracies": 1.0, "rewards/chosen": -0.14619815349578857, "rewards/margins": 0.19503293931484222, "rewards/rejected": -0.341231107711792, "step": 5961 }, { "epoch": 16.32306639288159, "grad_norm": 10.324227333068848, "learning_rate": 1.832876712328767e-07, "log_odds_chosen": 4.22811222076416, "log_odds_ratio": -0.43939051032066345, "logits/chosen": 1.003443956375122, "logits/rejected": 0.9778432250022888, "logps/chosen": -2.745918035507202, "logps/rejected": -6.896317481994629, "loss": 0.6368, "nll_loss": 0.5928848385810852, "rewards/accuracies": 0.75, "rewards/chosen": -0.2745918035507202, "rewards/margins": 0.41503995656967163, "rewards/rejected": -0.6896317005157471, "step": 5962 }, { "epoch": 16.32580424366872, "grad_norm": 7.433298587799072, "learning_rate": 1.8315068493150684e-07, "log_odds_chosen": 2.9362428188323975, "log_odds_ratio": -0.8182114958763123, "logits/chosen": 0.8746466636657715, "logits/rejected": 0.8733327388763428, "logps/chosen": -2.8822548389434814, "logps/rejected": -5.721031665802002, "loss": 0.6706, "nll_loss": 0.5887615084648132, "rewards/accuracies": 0.875, "rewards/chosen": -0.2882254719734192, "rewards/margins": 0.2838777005672455, "rewards/rejected": -0.5721032023429871, "step": 5963 }, { "epoch": 16.32854209445585, "grad_norm": 3.9593393802642822, "learning_rate": 1.8301369863013697e-07, "log_odds_chosen": 7.093767166137695, "log_odds_ratio": -0.018468748778104782, "logits/chosen": 1.1698675155639648, "logits/rejected": 1.2418583631515503, "logps/chosen": -1.7146681547164917, "logps/rejected": -8.463090896606445, "loss": 0.7015, "nll_loss": 0.6996797323226929, "rewards/accuracies": 1.0, "rewards/chosen": -0.17146681249141693, "rewards/margins": 0.6748421788215637, "rewards/rejected": -0.8463090658187866, "step": 5964 }, { "epoch": 16.331279945242983, "grad_norm": 12.45122241973877, "learning_rate": 1.8287671232876712e-07, "log_odds_chosen": 3.998593807220459, "log_odds_ratio": -0.4906933605670929, "logits/chosen": 1.194259762763977, "logits/rejected": 1.216807246208191, "logps/chosen": -2.508270263671875, "logps/rejected": -6.3060526847839355, "loss": 0.7161, "nll_loss": 0.6669934988021851, "rewards/accuracies": 0.625, "rewards/chosen": -0.25082704424858093, "rewards/margins": 0.3797782361507416, "rewards/rejected": -0.6306052803993225, "step": 5965 }, { "epoch": 16.334017796030118, "grad_norm": 3.610840082168579, "learning_rate": 1.8273972602739727e-07, "log_odds_chosen": 2.1904096603393555, "log_odds_ratio": -0.27346980571746826, "logits/chosen": 1.1766103506088257, "logits/rejected": 1.1222997903823853, "logps/chosen": -1.0897725820541382, "logps/rejected": -2.9804635047912598, "loss": 0.404, "nll_loss": 0.3766692578792572, "rewards/accuracies": 0.875, "rewards/chosen": -0.10897725820541382, "rewards/margins": 0.18906913697719574, "rewards/rejected": -0.29804638028144836, "step": 5966 }, { "epoch": 16.33675564681725, "grad_norm": 3.3338794708251953, "learning_rate": 1.8260273972602737e-07, "log_odds_chosen": 3.843883752822876, "log_odds_ratio": -0.12082940340042114, "logits/chosen": 0.9685798287391663, "logits/rejected": 0.9135062098503113, "logps/chosen": -1.984556794166565, "logps/rejected": -5.618923664093018, "loss": 0.536, "nll_loss": 0.5238843560218811, "rewards/accuracies": 1.0, "rewards/chosen": -0.19845567643642426, "rewards/margins": 0.3634366989135742, "rewards/rejected": -0.5618923902511597, "step": 5967 }, { "epoch": 16.33949349760438, "grad_norm": 14.77678108215332, "learning_rate": 1.8246575342465752e-07, "log_odds_chosen": 0.9532142877578735, "log_odds_ratio": -0.5518937110900879, "logits/chosen": 1.0895246267318726, "logits/rejected": 0.9564950466156006, "logps/chosen": -2.6373138427734375, "logps/rejected": -3.5630221366882324, "loss": 0.7459, "nll_loss": 0.6906892657279968, "rewards/accuracies": 0.75, "rewards/chosen": -0.2637313902378082, "rewards/margins": 0.09257084131240845, "rewards/rejected": -0.3563022017478943, "step": 5968 }, { "epoch": 16.34223134839151, "grad_norm": 6.00722599029541, "learning_rate": 1.8232876712328765e-07, "log_odds_chosen": 3.1701340675354004, "log_odds_ratio": -0.18986785411834717, "logits/chosen": 0.9619380831718445, "logits/rejected": 0.9389628171920776, "logps/chosen": -1.8091943264007568, "logps/rejected": -4.734644412994385, "loss": 0.5363, "nll_loss": 0.5172693133354187, "rewards/accuracies": 0.875, "rewards/chosen": -0.18091943860054016, "rewards/margins": 0.29254502058029175, "rewards/rejected": -0.4734644293785095, "step": 5969 }, { "epoch": 16.344969199178646, "grad_norm": 5.999159336090088, "learning_rate": 1.821917808219178e-07, "log_odds_chosen": 2.2492928504943848, "log_odds_ratio": -0.3825366497039795, "logits/chosen": 1.0580568313598633, "logits/rejected": 1.0656801462173462, "logps/chosen": -1.8692419528961182, "logps/rejected": -3.941970109939575, "loss": 0.5723, "nll_loss": 0.5340861678123474, "rewards/accuracies": 0.875, "rewards/chosen": -0.18692420423030853, "rewards/margins": 0.20727284252643585, "rewards/rejected": -0.3941970467567444, "step": 5970 }, { "epoch": 16.347707049965777, "grad_norm": 3.2979848384857178, "learning_rate": 1.8205479452054795e-07, "log_odds_chosen": 3.0908069610595703, "log_odds_ratio": -0.10862579196691513, "logits/chosen": 0.998015284538269, "logits/rejected": 0.9674619436264038, "logps/chosen": -1.7754243612289429, "logps/rejected": -4.6355390548706055, "loss": 0.4969, "nll_loss": 0.4860748052597046, "rewards/accuracies": 1.0, "rewards/chosen": -0.17754243314266205, "rewards/margins": 0.2860115170478821, "rewards/rejected": -0.4635539650917053, "step": 5971 }, { "epoch": 16.35044490075291, "grad_norm": 3.5541679859161377, "learning_rate": 1.8191780821917808e-07, "log_odds_chosen": 3.478959083557129, "log_odds_ratio": -0.1590443253517151, "logits/chosen": 0.7863279581069946, "logits/rejected": 0.8148301839828491, "logps/chosen": -1.9813337326049805, "logps/rejected": -5.2705817222595215, "loss": 0.66, "nll_loss": 0.6440773010253906, "rewards/accuracies": 1.0, "rewards/chosen": -0.19813337922096252, "rewards/margins": 0.3289247751235962, "rewards/rejected": -0.5270581841468811, "step": 5972 }, { "epoch": 16.35318275154004, "grad_norm": 4.12202262878418, "learning_rate": 1.8178082191780823e-07, "log_odds_chosen": 2.8896899223327637, "log_odds_ratio": -0.3340989351272583, "logits/chosen": 0.7622983455657959, "logits/rejected": 0.7324825525283813, "logps/chosen": -1.60858952999115, "logps/rejected": -4.276598930358887, "loss": 0.5782, "nll_loss": 0.544808566570282, "rewards/accuracies": 0.75, "rewards/chosen": -0.16085895895957947, "rewards/margins": 0.2668009102344513, "rewards/rejected": -0.42765989899635315, "step": 5973 }, { "epoch": 16.355920602327174, "grad_norm": 4.030570983886719, "learning_rate": 1.8164383561643833e-07, "log_odds_chosen": 2.9512734413146973, "log_odds_ratio": -0.16154545545578003, "logits/chosen": 1.2067549228668213, "logits/rejected": 1.2287262678146362, "logps/chosen": -1.7125955820083618, "logps/rejected": -4.265758037567139, "loss": 0.5101, "nll_loss": 0.49396073818206787, "rewards/accuracies": 1.0, "rewards/chosen": -0.1712595820426941, "rewards/margins": 0.25531625747680664, "rewards/rejected": -0.42657580971717834, "step": 5974 }, { "epoch": 16.358658453114305, "grad_norm": 3.6409053802490234, "learning_rate": 1.8150684931506848e-07, "log_odds_chosen": 4.886093616485596, "log_odds_ratio": -0.05857878923416138, "logits/chosen": 1.3233264684677124, "logits/rejected": 1.351047396659851, "logps/chosen": -2.1084728240966797, "logps/rejected": -6.825165748596191, "loss": 0.4978, "nll_loss": 0.49190929532051086, "rewards/accuracies": 1.0, "rewards/chosen": -0.21084728837013245, "rewards/margins": 0.4716692566871643, "rewards/rejected": -0.6825165748596191, "step": 5975 }, { "epoch": 16.361396303901437, "grad_norm": 4.876438617706299, "learning_rate": 1.813698630136986e-07, "log_odds_chosen": 1.4918256998062134, "log_odds_ratio": -0.3351799249649048, "logits/chosen": 1.0877255201339722, "logits/rejected": 0.9993524551391602, "logps/chosen": -1.3047361373901367, "logps/rejected": -2.534761905670166, "loss": 0.448, "nll_loss": 0.414522647857666, "rewards/accuracies": 0.875, "rewards/chosen": -0.13047361373901367, "rewards/margins": 0.12300258874893188, "rewards/rejected": -0.25347620248794556, "step": 5976 }, { "epoch": 16.36413415468857, "grad_norm": 4.645839214324951, "learning_rate": 1.8123287671232876e-07, "log_odds_chosen": 2.495359420776367, "log_odds_ratio": -0.21530643105506897, "logits/chosen": 1.1013182401657104, "logits/rejected": 1.0686099529266357, "logps/chosen": -1.7200183868408203, "logps/rejected": -4.0438079833984375, "loss": 0.6011, "nll_loss": 0.5795691609382629, "rewards/accuracies": 1.0, "rewards/chosen": -0.17200183868408203, "rewards/margins": 0.2323789894580841, "rewards/rejected": -0.40438079833984375, "step": 5977 }, { "epoch": 16.366872005475702, "grad_norm": 3.432698965072632, "learning_rate": 1.810958904109589e-07, "log_odds_chosen": 2.20339035987854, "log_odds_ratio": -0.2424531877040863, "logits/chosen": 0.8359389901161194, "logits/rejected": 0.8852843046188354, "logps/chosen": -1.5394043922424316, "logps/rejected": -3.4617207050323486, "loss": 0.574, "nll_loss": 0.549765944480896, "rewards/accuracies": 1.0, "rewards/chosen": -0.15394043922424316, "rewards/margins": 0.1922316551208496, "rewards/rejected": -0.3461720645427704, "step": 5978 }, { "epoch": 16.369609856262834, "grad_norm": 3.6323935985565186, "learning_rate": 1.8095890410958904e-07, "log_odds_chosen": 2.1963038444519043, "log_odds_ratio": -0.18261316418647766, "logits/chosen": 1.0261006355285645, "logits/rejected": 1.016650676727295, "logps/chosen": -1.5874767303466797, "logps/rejected": -3.579977035522461, "loss": 0.4764, "nll_loss": 0.45809465646743774, "rewards/accuracies": 1.0, "rewards/chosen": -0.15874767303466797, "rewards/margins": 0.1992500275373459, "rewards/rejected": -0.35799771547317505, "step": 5979 }, { "epoch": 16.372347707049965, "grad_norm": 3.459871292114258, "learning_rate": 1.8082191780821916e-07, "log_odds_chosen": 2.123363971710205, "log_odds_ratio": -0.21476073563098907, "logits/chosen": 0.8716439008712769, "logits/rejected": 0.8551843762397766, "logps/chosen": -1.2564985752105713, "logps/rejected": -3.1385278701782227, "loss": 0.3877, "nll_loss": 0.3662686347961426, "rewards/accuracies": 1.0, "rewards/chosen": -0.1256498545408249, "rewards/margins": 0.18820294737815857, "rewards/rejected": -0.31385278701782227, "step": 5980 }, { "epoch": 16.3750855578371, "grad_norm": 3.9222309589385986, "learning_rate": 1.806849315068493e-07, "log_odds_chosen": 2.921682596206665, "log_odds_ratio": -0.08092649281024933, "logits/chosen": 0.8046322464942932, "logits/rejected": 0.6947861909866333, "logps/chosen": -1.9155865907669067, "logps/rejected": -4.601899147033691, "loss": 0.6013, "nll_loss": 0.5932555198669434, "rewards/accuracies": 1.0, "rewards/chosen": -0.19155865907669067, "rewards/margins": 0.268631249666214, "rewards/rejected": -0.46018993854522705, "step": 5981 }, { "epoch": 16.37782340862423, "grad_norm": 4.230896949768066, "learning_rate": 1.8054794520547944e-07, "log_odds_chosen": 2.888420581817627, "log_odds_ratio": -0.24523398280143738, "logits/chosen": 1.3489984273910522, "logits/rejected": 1.347917079925537, "logps/chosen": -1.8032093048095703, "logps/rejected": -4.455641746520996, "loss": 0.5448, "nll_loss": 0.5202378034591675, "rewards/accuracies": 0.875, "rewards/chosen": -0.18032091856002808, "rewards/margins": 0.2652432322502136, "rewards/rejected": -0.4455641508102417, "step": 5982 }, { "epoch": 16.380561259411362, "grad_norm": 3.612123489379883, "learning_rate": 1.804109589041096e-07, "log_odds_chosen": 3.124264717102051, "log_odds_ratio": -0.2630803883075714, "logits/chosen": 0.8943802118301392, "logits/rejected": 0.9262518286705017, "logps/chosen": -2.007333517074585, "logps/rejected": -4.9769792556762695, "loss": 0.5315, "nll_loss": 0.5051993131637573, "rewards/accuracies": 0.875, "rewards/chosen": -0.20073336362838745, "rewards/margins": 0.2969645857810974, "rewards/rejected": -0.49769794940948486, "step": 5983 }, { "epoch": 16.383299110198493, "grad_norm": 4.490311145782471, "learning_rate": 1.8027397260273972e-07, "log_odds_chosen": 1.708787202835083, "log_odds_ratio": -0.26091718673706055, "logits/chosen": 1.1948471069335938, "logits/rejected": 1.1470376253128052, "logps/chosen": -2.0929923057556152, "logps/rejected": -3.665522813796997, "loss": 0.6099, "nll_loss": 0.5837794542312622, "rewards/accuracies": 1.0, "rewards/chosen": -0.209299236536026, "rewards/margins": 0.15725302696228027, "rewards/rejected": -0.36655229330062866, "step": 5984 }, { "epoch": 16.386036960985628, "grad_norm": 3.4537851810455322, "learning_rate": 1.8013698630136987e-07, "log_odds_chosen": 2.730607748031616, "log_odds_ratio": -0.12230589985847473, "logits/chosen": 0.8691290020942688, "logits/rejected": 0.8094558715820312, "logps/chosen": -1.6649165153503418, "logps/rejected": -4.1553754806518555, "loss": 0.5382, "nll_loss": 0.5259325504302979, "rewards/accuracies": 1.0, "rewards/chosen": -0.16649165749549866, "rewards/margins": 0.24904587864875793, "rewards/rejected": -0.4155375361442566, "step": 5985 }, { "epoch": 16.38877481177276, "grad_norm": 4.172177314758301, "learning_rate": 1.8e-07, "log_odds_chosen": 2.1707913875579834, "log_odds_ratio": -0.17325572669506073, "logits/chosen": 1.1813628673553467, "logits/rejected": 1.2151930332183838, "logps/chosen": -1.9845143556594849, "logps/rejected": -3.9753198623657227, "loss": 0.4578, "nll_loss": 0.4404400885105133, "rewards/accuracies": 1.0, "rewards/chosen": -0.198451429605484, "rewards/margins": 0.19908054172992706, "rewards/rejected": -0.39753201603889465, "step": 5986 }, { "epoch": 16.39151266255989, "grad_norm": 3.5889382362365723, "learning_rate": 1.7986301369863012e-07, "log_odds_chosen": 3.6377058029174805, "log_odds_ratio": -0.11131390184164047, "logits/chosen": 1.2316162586212158, "logits/rejected": 1.2576615810394287, "logps/chosen": -2.0786826610565186, "logps/rejected": -5.522638320922852, "loss": 0.5272, "nll_loss": 0.5160388350486755, "rewards/accuracies": 1.0, "rewards/chosen": -0.20786826312541962, "rewards/margins": 0.34439557790756226, "rewards/rejected": -0.5522638559341431, "step": 5987 }, { "epoch": 16.39425051334702, "grad_norm": 3.696056365966797, "learning_rate": 1.7972602739726025e-07, "log_odds_chosen": 2.2722651958465576, "log_odds_ratio": -0.2706088423728943, "logits/chosen": 0.9840701818466187, "logits/rejected": 0.9435076713562012, "logps/chosen": -2.2948555946350098, "logps/rejected": -4.489466190338135, "loss": 0.5866, "nll_loss": 0.559566855430603, "rewards/accuracies": 0.875, "rewards/chosen": -0.22948557138442993, "rewards/margins": 0.21946106851100922, "rewards/rejected": -0.44894662499427795, "step": 5988 }, { "epoch": 16.396988364134156, "grad_norm": 3.9786102771759033, "learning_rate": 1.795890410958904e-07, "log_odds_chosen": 1.1413187980651855, "log_odds_ratio": -0.3962583541870117, "logits/chosen": 1.1595925092697144, "logits/rejected": 1.1216092109680176, "logps/chosen": -1.501387596130371, "logps/rejected": -2.4737837314605713, "loss": 0.4623, "nll_loss": 0.4226565659046173, "rewards/accuracies": 0.75, "rewards/chosen": -0.1501387655735016, "rewards/margins": 0.09723961353302002, "rewards/rejected": -0.2473783791065216, "step": 5989 }, { "epoch": 16.399726214921287, "grad_norm": 9.10772705078125, "learning_rate": 1.7945205479452055e-07, "log_odds_chosen": 2.503035068511963, "log_odds_ratio": -0.14414054155349731, "logits/chosen": 0.8421819806098938, "logits/rejected": 0.6847640872001648, "logps/chosen": -2.611907958984375, "logps/rejected": -4.94906759262085, "loss": 0.6689, "nll_loss": 0.6544438004493713, "rewards/accuracies": 1.0, "rewards/chosen": -0.261190801858902, "rewards/margins": 0.2337159514427185, "rewards/rejected": -0.49490678310394287, "step": 5990 }, { "epoch": 16.40246406570842, "grad_norm": 4.115574836730957, "learning_rate": 1.7931506849315068e-07, "log_odds_chosen": 2.42022705078125, "log_odds_ratio": -0.23374107480049133, "logits/chosen": 0.8477590680122375, "logits/rejected": 0.8378744125366211, "logps/chosen": -1.5844993591308594, "logps/rejected": -3.7835028171539307, "loss": 0.5173, "nll_loss": 0.4939212501049042, "rewards/accuracies": 0.875, "rewards/chosen": -0.1584499329328537, "rewards/margins": 0.21990032494068146, "rewards/rejected": -0.37835025787353516, "step": 5991 }, { "epoch": 16.40520191649555, "grad_norm": 3.5571177005767822, "learning_rate": 1.7917808219178083e-07, "log_odds_chosen": 2.3132286071777344, "log_odds_ratio": -0.35263240337371826, "logits/chosen": 1.2760785818099976, "logits/rejected": 1.179789662361145, "logps/chosen": -1.2921793460845947, "logps/rejected": -3.393747329711914, "loss": 0.4681, "nll_loss": 0.4328579306602478, "rewards/accuracies": 0.875, "rewards/chosen": -0.1292179375886917, "rewards/margins": 0.21015678346157074, "rewards/rejected": -0.33937472105026245, "step": 5992 }, { "epoch": 16.407939767282684, "grad_norm": 4.440005302429199, "learning_rate": 1.7904109589041093e-07, "log_odds_chosen": 1.1984808444976807, "log_odds_ratio": -0.3094060719013214, "logits/chosen": 1.2194409370422363, "logits/rejected": 1.2056171894073486, "logps/chosen": -2.0533549785614014, "logps/rejected": -3.026793956756592, "loss": 0.474, "nll_loss": 0.4430326819419861, "rewards/accuracies": 1.0, "rewards/chosen": -0.20533552765846252, "rewards/margins": 0.09734384715557098, "rewards/rejected": -0.3026793599128723, "step": 5993 }, { "epoch": 16.410677618069816, "grad_norm": 4.228425025939941, "learning_rate": 1.7890410958904108e-07, "log_odds_chosen": 0.8943349123001099, "log_odds_ratio": -0.4672762155532837, "logits/chosen": 0.7510693669319153, "logits/rejected": 0.6377757787704468, "logps/chosen": -2.0517494678497314, "logps/rejected": -2.8569350242614746, "loss": 0.5539, "nll_loss": 0.5071541666984558, "rewards/accuracies": 0.75, "rewards/chosen": -0.20517495274543762, "rewards/margins": 0.08051855117082596, "rewards/rejected": -0.285693496465683, "step": 5994 }, { "epoch": 16.413415468856947, "grad_norm": 4.121177673339844, "learning_rate": 1.787671232876712e-07, "log_odds_chosen": 3.3446338176727295, "log_odds_ratio": -0.08482630550861359, "logits/chosen": 0.985044538974762, "logits/rejected": 0.9926922917366028, "logps/chosen": -2.2337799072265625, "logps/rejected": -5.426540851593018, "loss": 0.6456, "nll_loss": 0.637118399143219, "rewards/accuracies": 1.0, "rewards/chosen": -0.223377987742424, "rewards/margins": 0.3192760944366455, "rewards/rejected": -0.5426540970802307, "step": 5995 }, { "epoch": 16.416153319644078, "grad_norm": 4.378368854522705, "learning_rate": 1.7863013698630136e-07, "log_odds_chosen": 2.4919843673706055, "log_odds_ratio": -0.22798827290534973, "logits/chosen": 1.1033388376235962, "logits/rejected": 1.089961051940918, "logps/chosen": -2.128079652786255, "logps/rejected": -4.433238983154297, "loss": 0.5586, "nll_loss": 0.5358209609985352, "rewards/accuracies": 1.0, "rewards/chosen": -0.21280795335769653, "rewards/margins": 0.2305159568786621, "rewards/rejected": -0.44332391023635864, "step": 5996 }, { "epoch": 16.418891170431213, "grad_norm": 3.856790065765381, "learning_rate": 1.784931506849315e-07, "log_odds_chosen": 3.3033156394958496, "log_odds_ratio": -0.20218512415885925, "logits/chosen": 0.9229356646537781, "logits/rejected": 0.9101036190986633, "logps/chosen": -1.6884855031967163, "logps/rejected": -4.821579456329346, "loss": 0.5455, "nll_loss": 0.5252885818481445, "rewards/accuracies": 1.0, "rewards/chosen": -0.16884855926036835, "rewards/margins": 0.3133094012737274, "rewards/rejected": -0.48215797543525696, "step": 5997 }, { "epoch": 16.421629021218344, "grad_norm": 3.4342172145843506, "learning_rate": 1.7835616438356164e-07, "log_odds_chosen": 2.994889974594116, "log_odds_ratio": -0.1942553073167801, "logits/chosen": 1.4010131359100342, "logits/rejected": 1.3435784578323364, "logps/chosen": -1.6911616325378418, "logps/rejected": -4.528259754180908, "loss": 0.5932, "nll_loss": 0.5738212466239929, "rewards/accuracies": 0.875, "rewards/chosen": -0.16911615431308746, "rewards/margins": 0.283709853887558, "rewards/rejected": -0.45282602310180664, "step": 5998 }, { "epoch": 16.424366872005475, "grad_norm": 6.480837821960449, "learning_rate": 1.782191780821918e-07, "log_odds_chosen": 2.1135499477386475, "log_odds_ratio": -0.22387517988681793, "logits/chosen": 1.3410135507583618, "logits/rejected": 1.3604766130447388, "logps/chosen": -2.6173181533813477, "logps/rejected": -4.533998489379883, "loss": 0.5486, "nll_loss": 0.5262230038642883, "rewards/accuracies": 0.875, "rewards/chosen": -0.2617318034172058, "rewards/margins": 0.19166803359985352, "rewards/rejected": -0.4533998370170593, "step": 5999 }, { "epoch": 16.427104722792606, "grad_norm": 4.793182849884033, "learning_rate": 1.780821917808219e-07, "log_odds_chosen": 2.162895679473877, "log_odds_ratio": -0.47886955738067627, "logits/chosen": 1.1993921995162964, "logits/rejected": 1.3231713771820068, "logps/chosen": -2.241425037384033, "logps/rejected": -4.281499862670898, "loss": 0.561, "nll_loss": 0.5130795240402222, "rewards/accuracies": 0.875, "rewards/chosen": -0.22414252161979675, "rewards/margins": 0.20400741696357727, "rewards/rejected": -0.428149938583374, "step": 6000 }, { "epoch": 16.42984257357974, "grad_norm": 3.488689422607422, "learning_rate": 1.7794520547945204e-07, "log_odds_chosen": 2.717149496078491, "log_odds_ratio": -0.15493756532669067, "logits/chosen": 1.1020911931991577, "logits/rejected": 1.133234977722168, "logps/chosen": -1.8750005960464478, "logps/rejected": -4.377227783203125, "loss": 0.5495, "nll_loss": 0.5340403914451599, "rewards/accuracies": 1.0, "rewards/chosen": -0.18750007450580597, "rewards/margins": 0.25022274255752563, "rewards/rejected": -0.4377228021621704, "step": 6001 }, { "epoch": 16.432580424366872, "grad_norm": 4.806175708770752, "learning_rate": 1.778082191780822e-07, "log_odds_chosen": 5.837447166442871, "log_odds_ratio": -0.07992403954267502, "logits/chosen": 0.9654848575592041, "logits/rejected": 0.9603263735771179, "logps/chosen": -2.1890740394592285, "logps/rejected": -7.678481101989746, "loss": 0.6632, "nll_loss": 0.6551810503005981, "rewards/accuracies": 1.0, "rewards/chosen": -0.2189074009656906, "rewards/margins": 0.5489407181739807, "rewards/rejected": -0.7678481340408325, "step": 6002 }, { "epoch": 16.435318275154003, "grad_norm": 3.8395915031433105, "learning_rate": 1.7767123287671232e-07, "log_odds_chosen": 3.653574228286743, "log_odds_ratio": -0.3004353940486908, "logits/chosen": 0.9241020083427429, "logits/rejected": 0.9283537864685059, "logps/chosen": -2.3936331272125244, "logps/rejected": -5.936914443969727, "loss": 0.6245, "nll_loss": 0.5944615006446838, "rewards/accuracies": 0.875, "rewards/chosen": -0.23936332762241364, "rewards/margins": 0.35432809591293335, "rewards/rejected": -0.5936914682388306, "step": 6003 }, { "epoch": 16.438056125941138, "grad_norm": 5.172421932220459, "learning_rate": 1.7753424657534247e-07, "log_odds_chosen": 1.8415741920471191, "log_odds_ratio": -0.43240442872047424, "logits/chosen": 1.157953143119812, "logits/rejected": 1.074267864227295, "logps/chosen": -1.5673367977142334, "logps/rejected": -3.194281816482544, "loss": 0.5017, "nll_loss": 0.4584183394908905, "rewards/accuracies": 0.875, "rewards/chosen": -0.1567336916923523, "rewards/margins": 0.16269451379776, "rewards/rejected": -0.3194282054901123, "step": 6004 }, { "epoch": 16.44079397672827, "grad_norm": 4.036550998687744, "learning_rate": 1.773972602739726e-07, "log_odds_chosen": 0.9850307703018188, "log_odds_ratio": -0.45518574118614197, "logits/chosen": 1.0505650043487549, "logits/rejected": 1.0511319637298584, "logps/chosen": -2.5127224922180176, "logps/rejected": -3.3910610675811768, "loss": 0.6508, "nll_loss": 0.6052545309066772, "rewards/accuracies": 0.625, "rewards/chosen": -0.2512722611427307, "rewards/margins": 0.08783388137817383, "rewards/rejected": -0.33910614252090454, "step": 6005 }, { "epoch": 16.4435318275154, "grad_norm": 3.8262131214141846, "learning_rate": 1.7726027397260275e-07, "log_odds_chosen": 2.8798205852508545, "log_odds_ratio": -0.2075178623199463, "logits/chosen": 0.7349827885627747, "logits/rejected": 0.7310587167739868, "logps/chosen": -1.4809004068374634, "logps/rejected": -4.103754043579102, "loss": 0.4624, "nll_loss": 0.44169706106185913, "rewards/accuracies": 1.0, "rewards/chosen": -0.14809003472328186, "rewards/margins": 0.26228535175323486, "rewards/rejected": -0.4103754162788391, "step": 6006 }, { "epoch": 16.44626967830253, "grad_norm": 3.5354268550872803, "learning_rate": 1.7712328767123285e-07, "log_odds_chosen": 2.616887092590332, "log_odds_ratio": -0.1507398784160614, "logits/chosen": 1.4044067859649658, "logits/rejected": 1.4720869064331055, "logps/chosen": -1.9481897354125977, "logps/rejected": -4.42357063293457, "loss": 0.5384, "nll_loss": 0.5233556032180786, "rewards/accuracies": 1.0, "rewards/chosen": -0.19481897354125977, "rewards/margins": 0.24753805994987488, "rewards/rejected": -0.44235703349113464, "step": 6007 }, { "epoch": 16.449007529089666, "grad_norm": 3.725987195968628, "learning_rate": 1.76986301369863e-07, "log_odds_chosen": 3.0925772190093994, "log_odds_ratio": -0.16714568436145782, "logits/chosen": 1.273662805557251, "logits/rejected": 1.281724452972412, "logps/chosen": -1.859771966934204, "logps/rejected": -4.781452655792236, "loss": 0.5124, "nll_loss": 0.49571001529693604, "rewards/accuracies": 1.0, "rewards/chosen": -0.18597720563411713, "rewards/margins": 0.2921680212020874, "rewards/rejected": -0.4781452417373657, "step": 6008 }, { "epoch": 16.451745379876797, "grad_norm": 4.106542110443115, "learning_rate": 1.7684931506849315e-07, "log_odds_chosen": 3.9582319259643555, "log_odds_ratio": -0.1070634201169014, "logits/chosen": 1.0568981170654297, "logits/rejected": 1.0887534618377686, "logps/chosen": -2.13631272315979, "logps/rejected": -5.855452537536621, "loss": 0.6326, "nll_loss": 0.6219198107719421, "rewards/accuracies": 1.0, "rewards/chosen": -0.213631272315979, "rewards/margins": 0.37191396951675415, "rewards/rejected": -0.5855452418327332, "step": 6009 }, { "epoch": 16.45448323066393, "grad_norm": 3.6761467456817627, "learning_rate": 1.7671232876712328e-07, "log_odds_chosen": 1.7137682437896729, "log_odds_ratio": -0.23677542805671692, "logits/chosen": 0.9355732202529907, "logits/rejected": 0.950023353099823, "logps/chosen": -1.6760671138763428, "logps/rejected": -3.204989433288574, "loss": 0.5813, "nll_loss": 0.557600200176239, "rewards/accuracies": 1.0, "rewards/chosen": -0.16760671138763428, "rewards/margins": 0.15289220213890076, "rewards/rejected": -0.3204989433288574, "step": 6010 }, { "epoch": 16.45722108145106, "grad_norm": 3.5581395626068115, "learning_rate": 1.7657534246575343e-07, "log_odds_chosen": 2.5165624618530273, "log_odds_ratio": -0.1757442206144333, "logits/chosen": 1.1803545951843262, "logits/rejected": 1.1526684761047363, "logps/chosen": -1.8101491928100586, "logps/rejected": -4.167384147644043, "loss": 0.552, "nll_loss": 0.5344032645225525, "rewards/accuracies": 1.0, "rewards/chosen": -0.18101494014263153, "rewards/margins": 0.23572349548339844, "rewards/rejected": -0.4167384207248688, "step": 6011 }, { "epoch": 16.459958932238195, "grad_norm": 3.396714687347412, "learning_rate": 1.7643835616438355e-07, "log_odds_chosen": 2.64508318901062, "log_odds_ratio": -0.17076939344406128, "logits/chosen": 1.1465811729431152, "logits/rejected": 1.129684329032898, "logps/chosen": -1.8406705856323242, "logps/rejected": -4.300734519958496, "loss": 0.5098, "nll_loss": 0.4927285313606262, "rewards/accuracies": 1.0, "rewards/chosen": -0.18406707048416138, "rewards/margins": 0.24600641429424286, "rewards/rejected": -0.43007349967956543, "step": 6012 }, { "epoch": 16.462696783025326, "grad_norm": 4.979725360870361, "learning_rate": 1.763013698630137e-07, "log_odds_chosen": 3.1525468826293945, "log_odds_ratio": -0.14039447903633118, "logits/chosen": 1.1707642078399658, "logits/rejected": 1.1693960428237915, "logps/chosen": -2.0436694622039795, "logps/rejected": -4.99468469619751, "loss": 0.5512, "nll_loss": 0.53717440366745, "rewards/accuracies": 1.0, "rewards/chosen": -0.20436695218086243, "rewards/margins": 0.295101523399353, "rewards/rejected": -0.49946844577789307, "step": 6013 }, { "epoch": 16.465434633812457, "grad_norm": 3.820620536804199, "learning_rate": 1.761643835616438e-07, "log_odds_chosen": 5.392053604125977, "log_odds_ratio": -0.10705247521400452, "logits/chosen": 1.1192055940628052, "logits/rejected": 1.1537247896194458, "logps/chosen": -1.6950905323028564, "logps/rejected": -6.822897911071777, "loss": 0.5903, "nll_loss": 0.5795665979385376, "rewards/accuracies": 1.0, "rewards/chosen": -0.16950906813144684, "rewards/margins": 0.5127806663513184, "rewards/rejected": -0.6822897791862488, "step": 6014 }, { "epoch": 16.468172484599588, "grad_norm": 3.3882710933685303, "learning_rate": 1.7602739726027396e-07, "log_odds_chosen": 4.176044464111328, "log_odds_ratio": -0.19779178500175476, "logits/chosen": 1.0656993389129639, "logits/rejected": 0.992117166519165, "logps/chosen": -1.5193195343017578, "logps/rejected": -5.454330921173096, "loss": 0.5625, "nll_loss": 0.5427558422088623, "rewards/accuracies": 1.0, "rewards/chosen": -0.15193195641040802, "rewards/margins": 0.3935011625289917, "rewards/rejected": -0.5454331040382385, "step": 6015 }, { "epoch": 16.470910335386723, "grad_norm": 3.5381557941436768, "learning_rate": 1.758904109589041e-07, "log_odds_chosen": 2.767550468444824, "log_odds_ratio": -0.10159555077552795, "logits/chosen": 1.282138466835022, "logits/rejected": 1.2531992197036743, "logps/chosen": -1.457572102546692, "logps/rejected": -3.923129081726074, "loss": 0.4924, "nll_loss": 0.4822443723678589, "rewards/accuracies": 1.0, "rewards/chosen": -0.14575721323490143, "rewards/margins": 0.24655571579933167, "rewards/rejected": -0.3923129439353943, "step": 6016 }, { "epoch": 16.473648186173854, "grad_norm": 3.6268093585968018, "learning_rate": 1.7575342465753424e-07, "log_odds_chosen": 2.5244107246398926, "log_odds_ratio": -0.24214722216129303, "logits/chosen": 1.2346123456954956, "logits/rejected": 1.2239969968795776, "logps/chosen": -1.8014566898345947, "logps/rejected": -4.184274196624756, "loss": 0.4286, "nll_loss": 0.4043373465538025, "rewards/accuracies": 1.0, "rewards/chosen": -0.18014568090438843, "rewards/margins": 0.2382817566394806, "rewards/rejected": -0.4184274673461914, "step": 6017 }, { "epoch": 16.476386036960985, "grad_norm": 3.988600969314575, "learning_rate": 1.756164383561644e-07, "log_odds_chosen": 3.2467644214630127, "log_odds_ratio": -0.2560029923915863, "logits/chosen": 1.242287516593933, "logits/rejected": 1.2463364601135254, "logps/chosen": -2.3674628734588623, "logps/rejected": -5.475293159484863, "loss": 0.5094, "nll_loss": 0.4838321805000305, "rewards/accuracies": 1.0, "rewards/chosen": -0.23674628138542175, "rewards/margins": 0.3107830286026001, "rewards/rejected": -0.5475293397903442, "step": 6018 }, { "epoch": 16.479123887748116, "grad_norm": 3.6928186416625977, "learning_rate": 1.754794520547945e-07, "log_odds_chosen": 3.3994343280792236, "log_odds_ratio": -0.1842905431985855, "logits/chosen": 0.8631188273429871, "logits/rejected": 0.7938153147697449, "logps/chosen": -1.3509029150009155, "logps/rejected": -4.498926639556885, "loss": 0.4906, "nll_loss": 0.4722042679786682, "rewards/accuracies": 1.0, "rewards/chosen": -0.13509029150009155, "rewards/margins": 0.314802348613739, "rewards/rejected": -0.44989269971847534, "step": 6019 }, { "epoch": 16.48186173853525, "grad_norm": 3.812870740890503, "learning_rate": 1.7534246575342464e-07, "log_odds_chosen": 1.5179557800292969, "log_odds_ratio": -0.2916550636291504, "logits/chosen": 1.2014901638031006, "logits/rejected": 1.2005740404129028, "logps/chosen": -1.7153241634368896, "logps/rejected": -3.1148383617401123, "loss": 0.5527, "nll_loss": 0.5235058665275574, "rewards/accuracies": 0.875, "rewards/chosen": -0.17153242230415344, "rewards/margins": 0.1399514079093933, "rewards/rejected": -0.31148386001586914, "step": 6020 }, { "epoch": 16.484599589322382, "grad_norm": 3.653433322906494, "learning_rate": 1.752054794520548e-07, "log_odds_chosen": 2.423189163208008, "log_odds_ratio": -0.2584803104400635, "logits/chosen": 1.1230392456054688, "logits/rejected": 1.1511867046356201, "logps/chosen": -1.9129970073699951, "logps/rejected": -4.224920749664307, "loss": 0.5299, "nll_loss": 0.5040217638015747, "rewards/accuracies": 1.0, "rewards/chosen": -0.191299706697464, "rewards/margins": 0.23119236528873444, "rewards/rejected": -0.4224920868873596, "step": 6021 }, { "epoch": 16.487337440109513, "grad_norm": 3.228675127029419, "learning_rate": 1.7506849315068492e-07, "log_odds_chosen": 2.8144619464874268, "log_odds_ratio": -0.15580351650714874, "logits/chosen": 0.9963603019714355, "logits/rejected": 0.9601472616195679, "logps/chosen": -1.9425746202468872, "logps/rejected": -4.610930442810059, "loss": 0.5801, "nll_loss": 0.5645188689231873, "rewards/accuracies": 1.0, "rewards/chosen": -0.194257453083992, "rewards/margins": 0.26683560013771057, "rewards/rejected": -0.46109306812286377, "step": 6022 }, { "epoch": 16.490075290896645, "grad_norm": 4.159582614898682, "learning_rate": 1.7493150684931507e-07, "log_odds_chosen": 1.9533932209014893, "log_odds_ratio": -0.22813160717487335, "logits/chosen": 1.1356594562530518, "logits/rejected": 1.0759117603302002, "logps/chosen": -1.8287463188171387, "logps/rejected": -3.5653982162475586, "loss": 0.5545, "nll_loss": 0.5317273139953613, "rewards/accuracies": 1.0, "rewards/chosen": -0.1828746497631073, "rewards/margins": 0.17366519570350647, "rewards/rejected": -0.35653984546661377, "step": 6023 }, { "epoch": 16.49281314168378, "grad_norm": 6.654414176940918, "learning_rate": 1.747945205479452e-07, "log_odds_chosen": 1.304117202758789, "log_odds_ratio": -0.3641406297683716, "logits/chosen": 1.0902513265609741, "logits/rejected": 1.0181760787963867, "logps/chosen": -2.256777763366699, "logps/rejected": -3.4688215255737305, "loss": 0.6751, "nll_loss": 0.6387059092521667, "rewards/accuracies": 0.875, "rewards/chosen": -0.2256777584552765, "rewards/margins": 0.12120439857244492, "rewards/rejected": -0.346882164478302, "step": 6024 }, { "epoch": 16.49555099247091, "grad_norm": 19.25982093811035, "learning_rate": 1.7465753424657535e-07, "log_odds_chosen": 1.3860399723052979, "log_odds_ratio": -0.5769562721252441, "logits/chosen": 1.2733136415481567, "logits/rejected": 1.245030164718628, "logps/chosen": -2.8186421394348145, "logps/rejected": -4.154299736022949, "loss": 0.6687, "nll_loss": 0.611021101474762, "rewards/accuracies": 0.625, "rewards/chosen": -0.2818642258644104, "rewards/margins": 0.1335657238960266, "rewards/rejected": -0.415429949760437, "step": 6025 }, { "epoch": 16.49828884325804, "grad_norm": 3.7805068492889404, "learning_rate": 1.7452054794520547e-07, "log_odds_chosen": 6.470249176025391, "log_odds_ratio": -0.13891640305519104, "logits/chosen": 1.0073050260543823, "logits/rejected": 1.0378395318984985, "logps/chosen": -2.021946430206299, "logps/rejected": -8.339788436889648, "loss": 0.6973, "nll_loss": 0.6833708882331848, "rewards/accuracies": 0.875, "rewards/chosen": -0.20219466090202332, "rewards/margins": 0.631784200668335, "rewards/rejected": -0.8339788913726807, "step": 6026 }, { "epoch": 16.501026694045173, "grad_norm": 3.359830856323242, "learning_rate": 1.743835616438356e-07, "log_odds_chosen": 3.4086878299713135, "log_odds_ratio": -0.16873618960380554, "logits/chosen": 0.8238685131072998, "logits/rejected": 0.7802448272705078, "logps/chosen": -1.590045690536499, "logps/rejected": -4.756364822387695, "loss": 0.5495, "nll_loss": 0.5325955152511597, "rewards/accuracies": 1.0, "rewards/chosen": -0.1590045839548111, "rewards/margins": 0.31663191318511963, "rewards/rejected": -0.4756365120410919, "step": 6027 }, { "epoch": 16.503764544832308, "grad_norm": 4.151253700256348, "learning_rate": 1.7424657534246575e-07, "log_odds_chosen": 4.1928629875183105, "log_odds_ratio": -0.16572873294353485, "logits/chosen": 1.1265708208084106, "logits/rejected": 1.1547458171844482, "logps/chosen": -2.231485366821289, "logps/rejected": -6.272365570068359, "loss": 0.5494, "nll_loss": 0.532842755317688, "rewards/accuracies": 1.0, "rewards/chosen": -0.22314853966236115, "rewards/margins": 0.40408802032470703, "rewards/rejected": -0.6272366046905518, "step": 6028 }, { "epoch": 16.50650239561944, "grad_norm": 3.324768304824829, "learning_rate": 1.7410958904109587e-07, "log_odds_chosen": 1.9218978881835938, "log_odds_ratio": -0.2692034840583801, "logits/chosen": 0.99777752161026, "logits/rejected": 0.9268391132354736, "logps/chosen": -1.7623414993286133, "logps/rejected": -3.5444047451019287, "loss": 0.5062, "nll_loss": 0.47932180762290955, "rewards/accuracies": 0.875, "rewards/chosen": -0.1762341558933258, "rewards/margins": 0.17820632457733154, "rewards/rejected": -0.35444048047065735, "step": 6029 }, { "epoch": 16.50924024640657, "grad_norm": 3.7668445110321045, "learning_rate": 1.7397260273972603e-07, "log_odds_chosen": 2.939666986465454, "log_odds_ratio": -0.16203704476356506, "logits/chosen": 1.2263493537902832, "logits/rejected": 1.2503310441970825, "logps/chosen": -1.270569920539856, "logps/rejected": -3.8752212524414062, "loss": 0.4501, "nll_loss": 0.4339086413383484, "rewards/accuracies": 1.0, "rewards/chosen": -0.1270570009946823, "rewards/margins": 0.26046517491340637, "rewards/rejected": -0.3875221610069275, "step": 6030 }, { "epoch": 16.511978097193705, "grad_norm": 3.470271348953247, "learning_rate": 1.7383561643835615e-07, "log_odds_chosen": 2.775480270385742, "log_odds_ratio": -0.17143596708774567, "logits/chosen": 1.1712838411331177, "logits/rejected": 1.1667760610580444, "logps/chosen": -1.9594210386276245, "logps/rejected": -4.524468421936035, "loss": 0.477, "nll_loss": 0.4598202705383301, "rewards/accuracies": 1.0, "rewards/chosen": -0.19594210386276245, "rewards/margins": 0.25650471448898315, "rewards/rejected": -0.4524468183517456, "step": 6031 }, { "epoch": 16.514715947980836, "grad_norm": 5.146540641784668, "learning_rate": 1.736986301369863e-07, "log_odds_chosen": 1.957237958908081, "log_odds_ratio": -0.3499881327152252, "logits/chosen": 1.2679290771484375, "logits/rejected": 1.2376022338867188, "logps/chosen": -1.6032929420471191, "logps/rejected": -3.277721405029297, "loss": 0.4581, "nll_loss": 0.42311781644821167, "rewards/accuracies": 0.75, "rewards/chosen": -0.16032928228378296, "rewards/margins": 0.16744285821914673, "rewards/rejected": -0.3277721405029297, "step": 6032 }, { "epoch": 16.517453798767967, "grad_norm": 3.4315309524536133, "learning_rate": 1.7356164383561643e-07, "log_odds_chosen": 5.0567307472229, "log_odds_ratio": -0.04546118155121803, "logits/chosen": 1.2716412544250488, "logits/rejected": 1.2902929782867432, "logps/chosen": -2.0877890586853027, "logps/rejected": -6.89219331741333, "loss": 0.5377, "nll_loss": 0.5331294536590576, "rewards/accuracies": 1.0, "rewards/chosen": -0.20877891778945923, "rewards/margins": 0.4804404079914093, "rewards/rejected": -0.6892193555831909, "step": 6033 }, { "epoch": 16.520191649555098, "grad_norm": 12.219003677368164, "learning_rate": 1.7342465753424656e-07, "log_odds_chosen": 2.4743852615356445, "log_odds_ratio": -0.5974072813987732, "logits/chosen": 1.1348336935043335, "logits/rejected": 1.0906133651733398, "logps/chosen": -2.414921522140503, "logps/rejected": -4.69162654876709, "loss": 0.6833, "nll_loss": 0.6235336065292358, "rewards/accuracies": 0.75, "rewards/chosen": -0.2414921522140503, "rewards/margins": 0.22767050564289093, "rewards/rejected": -0.4691626727581024, "step": 6034 }, { "epoch": 16.522929500342233, "grad_norm": 3.6319754123687744, "learning_rate": 1.732876712328767e-07, "log_odds_chosen": 2.254668951034546, "log_odds_ratio": -0.17641550302505493, "logits/chosen": 0.9584379196166992, "logits/rejected": 1.034789800643921, "logps/chosen": -1.753965139389038, "logps/rejected": -3.8251123428344727, "loss": 0.5003, "nll_loss": 0.4826989769935608, "rewards/accuracies": 0.875, "rewards/chosen": -0.17539653182029724, "rewards/margins": 0.20711469650268555, "rewards/rejected": -0.3825112283229828, "step": 6035 }, { "epoch": 16.525667351129364, "grad_norm": 3.824634552001953, "learning_rate": 1.7315068493150683e-07, "log_odds_chosen": 2.461371421813965, "log_odds_ratio": -0.1965012550354004, "logits/chosen": 1.063481330871582, "logits/rejected": 1.0524911880493164, "logps/chosen": -1.7881428003311157, "logps/rejected": -4.056002140045166, "loss": 0.4928, "nll_loss": 0.4731304347515106, "rewards/accuracies": 1.0, "rewards/chosen": -0.17881429195404053, "rewards/margins": 0.22678592801094055, "rewards/rejected": -0.4056002199649811, "step": 6036 }, { "epoch": 16.528405201916495, "grad_norm": 4.294742584228516, "learning_rate": 1.7301369863013699e-07, "log_odds_chosen": 2.8601202964782715, "log_odds_ratio": -0.19247643649578094, "logits/chosen": 1.330958604812622, "logits/rejected": 1.3959898948669434, "logps/chosen": -2.475538969039917, "logps/rejected": -5.213353157043457, "loss": 0.5587, "nll_loss": 0.5394769906997681, "rewards/accuracies": 0.875, "rewards/chosen": -0.24755389988422394, "rewards/margins": 0.273781418800354, "rewards/rejected": -0.5213353037834167, "step": 6037 }, { "epoch": 16.531143052703626, "grad_norm": 7.385919094085693, "learning_rate": 1.728767123287671e-07, "log_odds_chosen": 2.9784274101257324, "log_odds_ratio": -0.21862384676933289, "logits/chosen": 1.0842630863189697, "logits/rejected": 0.9843013286590576, "logps/chosen": -2.3611574172973633, "logps/rejected": -5.201245307922363, "loss": 0.6962, "nll_loss": 0.6743686199188232, "rewards/accuracies": 1.0, "rewards/chosen": -0.2361157387495041, "rewards/margins": 0.28400880098342896, "rewards/rejected": -0.5201245546340942, "step": 6038 }, { "epoch": 16.53388090349076, "grad_norm": 6.34273624420166, "learning_rate": 1.7273972602739726e-07, "log_odds_chosen": 3.2096900939941406, "log_odds_ratio": -0.1452203243970871, "logits/chosen": 1.1746034622192383, "logits/rejected": 1.1798105239868164, "logps/chosen": -1.5097007751464844, "logps/rejected": -4.275964260101318, "loss": 0.5409, "nll_loss": 0.5263653993606567, "rewards/accuracies": 1.0, "rewards/chosen": -0.15097010135650635, "rewards/margins": 0.2766263484954834, "rewards/rejected": -0.42759644985198975, "step": 6039 }, { "epoch": 16.536618754277892, "grad_norm": 6.897154331207275, "learning_rate": 1.7260273972602742e-07, "log_odds_chosen": 3.0226683616638184, "log_odds_ratio": -0.2769874632358551, "logits/chosen": 1.2382452487945557, "logits/rejected": 1.206909418106079, "logps/chosen": -2.7435169219970703, "logps/rejected": -5.632463455200195, "loss": 0.5874, "nll_loss": 0.5597112774848938, "rewards/accuracies": 0.875, "rewards/chosen": -0.27435171604156494, "rewards/margins": 0.2888946831226349, "rewards/rejected": -0.5632463693618774, "step": 6040 }, { "epoch": 16.539356605065024, "grad_norm": 4.861945629119873, "learning_rate": 1.7246575342465751e-07, "log_odds_chosen": 3.678865909576416, "log_odds_ratio": -0.11321327835321426, "logits/chosen": 1.1765543222427368, "logits/rejected": 1.074190378189087, "logps/chosen": -1.6587804555892944, "logps/rejected": -5.104038715362549, "loss": 0.4981, "nll_loss": 0.486796110868454, "rewards/accuracies": 1.0, "rewards/chosen": -0.1658780574798584, "rewards/margins": 0.3445258140563965, "rewards/rejected": -0.5104038715362549, "step": 6041 }, { "epoch": 16.542094455852155, "grad_norm": 8.950685501098633, "learning_rate": 1.7232876712328767e-07, "log_odds_chosen": 1.5330349206924438, "log_odds_ratio": -0.4926016628742218, "logits/chosen": 1.149138331413269, "logits/rejected": 1.1471048593521118, "logps/chosen": -2.5771384239196777, "logps/rejected": -4.018762588500977, "loss": 0.62, "nll_loss": 0.5706929564476013, "rewards/accuracies": 0.75, "rewards/chosen": -0.25771385431289673, "rewards/margins": 0.1441624015569687, "rewards/rejected": -0.4018762707710266, "step": 6042 }, { "epoch": 16.54483230663929, "grad_norm": 3.5148963928222656, "learning_rate": 1.721917808219178e-07, "log_odds_chosen": 2.072422981262207, "log_odds_ratio": -0.18427032232284546, "logits/chosen": 0.9790765047073364, "logits/rejected": 0.8685175180435181, "logps/chosen": -1.404870629310608, "logps/rejected": -3.1329526901245117, "loss": 0.542, "nll_loss": 0.5235381126403809, "rewards/accuracies": 1.0, "rewards/chosen": -0.14048705995082855, "rewards/margins": 0.1728082150220871, "rewards/rejected": -0.31329527497291565, "step": 6043 }, { "epoch": 16.54757015742642, "grad_norm": 3.462562322616577, "learning_rate": 1.7205479452054794e-07, "log_odds_chosen": 2.078763246536255, "log_odds_ratio": -0.1537691056728363, "logits/chosen": 0.9103056192398071, "logits/rejected": 0.8218916058540344, "logps/chosen": -1.4298207759857178, "logps/rejected": -3.244849920272827, "loss": 0.5561, "nll_loss": 0.5407312512397766, "rewards/accuracies": 1.0, "rewards/chosen": -0.14298208057880402, "rewards/margins": 0.18150293827056885, "rewards/rejected": -0.32448500394821167, "step": 6044 }, { "epoch": 16.550308008213552, "grad_norm": 12.329099655151367, "learning_rate": 1.7191780821917807e-07, "log_odds_chosen": 1.850219488143921, "log_odds_ratio": -0.41199997067451477, "logits/chosen": 1.275259017944336, "logits/rejected": 1.273648738861084, "logps/chosen": -1.9588932991027832, "logps/rejected": -3.6749143600463867, "loss": 0.6392, "nll_loss": 0.5979734063148499, "rewards/accuracies": 0.875, "rewards/chosen": -0.19588932394981384, "rewards/margins": 0.17160212993621826, "rewards/rejected": -0.3674914538860321, "step": 6045 }, { "epoch": 16.553045859000683, "grad_norm": 5.469057083129883, "learning_rate": 1.7178082191780822e-07, "log_odds_chosen": 3.253422737121582, "log_odds_ratio": -0.1328350007534027, "logits/chosen": 1.1267976760864258, "logits/rejected": 1.1174181699752808, "logps/chosen": -2.705624580383301, "logps/rejected": -5.821044445037842, "loss": 0.5224, "nll_loss": 0.5091429948806763, "rewards/accuracies": 1.0, "rewards/chosen": -0.27056246995925903, "rewards/margins": 0.31154197454452515, "rewards/rejected": -0.5821044445037842, "step": 6046 }, { "epoch": 16.555783709787818, "grad_norm": 3.502046823501587, "learning_rate": 1.7164383561643835e-07, "log_odds_chosen": 3.2734620571136475, "log_odds_ratio": -0.1309133917093277, "logits/chosen": 1.1828968524932861, "logits/rejected": 1.1779751777648926, "logps/chosen": -1.3715466260910034, "logps/rejected": -4.37276029586792, "loss": 0.5305, "nll_loss": 0.5174583196640015, "rewards/accuracies": 1.0, "rewards/chosen": -0.13715465366840363, "rewards/margins": 0.30012136697769165, "rewards/rejected": -0.43727606534957886, "step": 6047 }, { "epoch": 16.55852156057495, "grad_norm": 5.904938220977783, "learning_rate": 1.7150684931506847e-07, "log_odds_chosen": 1.7204620838165283, "log_odds_ratio": -0.2869074046611786, "logits/chosen": 1.3665797710418701, "logits/rejected": 1.3679076433181763, "logps/chosen": -2.7142412662506104, "logps/rejected": -4.334312438964844, "loss": 0.6646, "nll_loss": 0.635918915271759, "rewards/accuracies": 0.875, "rewards/chosen": -0.2714241147041321, "rewards/margins": 0.16200712323188782, "rewards/rejected": -0.4334312379360199, "step": 6048 }, { "epoch": 16.56125941136208, "grad_norm": 4.053147792816162, "learning_rate": 1.7136986301369863e-07, "log_odds_chosen": 2.9016098976135254, "log_odds_ratio": -0.16391335427761078, "logits/chosen": 1.054287075996399, "logits/rejected": 1.0845749378204346, "logps/chosen": -1.9416073560714722, "logps/rejected": -4.670178413391113, "loss": 0.6161, "nll_loss": 0.5997095108032227, "rewards/accuracies": 1.0, "rewards/chosen": -0.19416074454784393, "rewards/margins": 0.27285706996917725, "rewards/rejected": -0.46701779961586, "step": 6049 }, { "epoch": 16.56399726214921, "grad_norm": 6.69504976272583, "learning_rate": 1.7123287671232875e-07, "log_odds_chosen": 2.5415053367614746, "log_odds_ratio": -0.3241557776927948, "logits/chosen": 1.2301942110061646, "logits/rejected": 1.261506199836731, "logps/chosen": -2.692564010620117, "logps/rejected": -5.111231327056885, "loss": 0.5494, "nll_loss": 0.5170286297798157, "rewards/accuracies": 0.875, "rewards/chosen": -0.2692563831806183, "rewards/margins": 0.24186678230762482, "rewards/rejected": -0.5111231207847595, "step": 6050 }, { "epoch": 16.566735112936346, "grad_norm": 3.893542528152466, "learning_rate": 1.710958904109589e-07, "log_odds_chosen": 3.747796058654785, "log_odds_ratio": -0.19885015487670898, "logits/chosen": 1.1816046237945557, "logits/rejected": 1.195157527923584, "logps/chosen": -2.1447224617004395, "logps/rejected": -5.655932903289795, "loss": 0.5395, "nll_loss": 0.5195962190628052, "rewards/accuracies": 1.0, "rewards/chosen": -0.21447226405143738, "rewards/margins": 0.35112103819847107, "rewards/rejected": -0.5655933022499084, "step": 6051 }, { "epoch": 16.569472963723477, "grad_norm": 4.495175838470459, "learning_rate": 1.7095890410958905e-07, "log_odds_chosen": 2.4237875938415527, "log_odds_ratio": -0.2387002855539322, "logits/chosen": 0.9286860227584839, "logits/rejected": 0.9497108459472656, "logps/chosen": -2.3134377002716064, "logps/rejected": -4.531716823577881, "loss": 0.6551, "nll_loss": 0.6312709450721741, "rewards/accuracies": 1.0, "rewards/chosen": -0.23134377598762512, "rewards/margins": 0.221827894449234, "rewards/rejected": -0.45317167043685913, "step": 6052 }, { "epoch": 16.57221081451061, "grad_norm": 11.316596031188965, "learning_rate": 1.7082191780821918e-07, "log_odds_chosen": 1.5100507736206055, "log_odds_ratio": -0.5066588521003723, "logits/chosen": 1.1650580167770386, "logits/rejected": 1.1434155702590942, "logps/chosen": -2.331509828567505, "logps/rejected": -3.7462711334228516, "loss": 0.5919, "nll_loss": 0.541201651096344, "rewards/accuracies": 0.75, "rewards/chosen": -0.23315097391605377, "rewards/margins": 0.1414761245250702, "rewards/rejected": -0.37462711334228516, "step": 6053 }, { "epoch": 16.57494866529774, "grad_norm": 3.500403642654419, "learning_rate": 1.706849315068493e-07, "log_odds_chosen": 3.409411907196045, "log_odds_ratio": -0.13952866196632385, "logits/chosen": 1.419665813446045, "logits/rejected": 1.3883990049362183, "logps/chosen": -1.7468292713165283, "logps/rejected": -4.929556846618652, "loss": 0.464, "nll_loss": 0.45006221532821655, "rewards/accuracies": 1.0, "rewards/chosen": -0.17468293011188507, "rewards/margins": 0.31827276945114136, "rewards/rejected": -0.49295568466186523, "step": 6054 }, { "epoch": 16.577686516084874, "grad_norm": 4.624992847442627, "learning_rate": 1.7054794520547943e-07, "log_odds_chosen": 1.8665006160736084, "log_odds_ratio": -0.2121415138244629, "logits/chosen": 0.924191415309906, "logits/rejected": 0.8991568088531494, "logps/chosen": -2.3655877113342285, "logps/rejected": -4.089244842529297, "loss": 0.481, "nll_loss": 0.45979103446006775, "rewards/accuracies": 1.0, "rewards/chosen": -0.23655876517295837, "rewards/margins": 0.1723657250404358, "rewards/rejected": -0.40892449021339417, "step": 6055 }, { "epoch": 16.580424366872005, "grad_norm": 4.050854206085205, "learning_rate": 1.7041095890410958e-07, "log_odds_chosen": 1.9075853824615479, "log_odds_ratio": -0.2466822862625122, "logits/chosen": 1.262650966644287, "logits/rejected": 1.2102006673812866, "logps/chosen": -1.3880562782287598, "logps/rejected": -3.0702810287475586, "loss": 0.4768, "nll_loss": 0.45213499665260315, "rewards/accuracies": 1.0, "rewards/chosen": -0.13880561292171478, "rewards/margins": 0.16822248697280884, "rewards/rejected": -0.3070281147956848, "step": 6056 }, { "epoch": 16.583162217659137, "grad_norm": 5.42373514175415, "learning_rate": 1.702739726027397e-07, "log_odds_chosen": 1.681751012802124, "log_odds_ratio": -0.3266613483428955, "logits/chosen": 1.3739585876464844, "logits/rejected": 1.3000284433364868, "logps/chosen": -1.9984040260314941, "logps/rejected": -3.4886417388916016, "loss": 0.5365, "nll_loss": 0.5038154125213623, "rewards/accuracies": 0.875, "rewards/chosen": -0.19984041154384613, "rewards/margins": 0.14902377128601074, "rewards/rejected": -0.3488641679286957, "step": 6057 }, { "epoch": 16.58590006844627, "grad_norm": 3.7589261531829834, "learning_rate": 1.7013698630136986e-07, "log_odds_chosen": 1.1734507083892822, "log_odds_ratio": -0.31498289108276367, "logits/chosen": 0.8100641965866089, "logits/rejected": 0.7271724343299866, "logps/chosen": -1.2994911670684814, "logps/rejected": -2.273578643798828, "loss": 0.5447, "nll_loss": 0.5132427215576172, "rewards/accuracies": 1.0, "rewards/chosen": -0.12994910776615143, "rewards/margins": 0.09740875661373138, "rewards/rejected": -0.2273578643798828, "step": 6058 }, { "epoch": 16.588637919233403, "grad_norm": 4.2813639640808105, "learning_rate": 1.7000000000000001e-07, "log_odds_chosen": 2.9084935188293457, "log_odds_ratio": -0.1657228022813797, "logits/chosen": 1.066957950592041, "logits/rejected": 1.0944008827209473, "logps/chosen": -2.2624282836914062, "logps/rejected": -4.989426136016846, "loss": 0.5284, "nll_loss": 0.5117814540863037, "rewards/accuracies": 1.0, "rewards/chosen": -0.2262428253889084, "rewards/margins": 0.2726998031139374, "rewards/rejected": -0.49894264340400696, "step": 6059 }, { "epoch": 16.591375770020534, "grad_norm": 3.394202709197998, "learning_rate": 1.698630136986301e-07, "log_odds_chosen": 2.9940898418426514, "log_odds_ratio": -0.18161362409591675, "logits/chosen": 0.8157595992088318, "logits/rejected": 0.7776714563369751, "logps/chosen": -1.5750852823257446, "logps/rejected": -4.34403657913208, "loss": 0.4935, "nll_loss": 0.4753241240978241, "rewards/accuracies": 1.0, "rewards/chosen": -0.15750852227210999, "rewards/margins": 0.276895135641098, "rewards/rejected": -0.4344036877155304, "step": 6060 }, { "epoch": 16.594113620807665, "grad_norm": 3.423057794570923, "learning_rate": 1.6972602739726026e-07, "log_odds_chosen": 1.661677598953247, "log_odds_ratio": -0.20524336397647858, "logits/chosen": 1.1800113916397095, "logits/rejected": 1.1490455865859985, "logps/chosen": -1.5988558530807495, "logps/rejected": -3.046701431274414, "loss": 0.5076, "nll_loss": 0.4871079623699188, "rewards/accuracies": 1.0, "rewards/chosen": -0.15988558530807495, "rewards/margins": 0.14478455483913422, "rewards/rejected": -0.30467015504837036, "step": 6061 }, { "epoch": 16.5968514715948, "grad_norm": 3.28353214263916, "learning_rate": 1.695890410958904e-07, "log_odds_chosen": 1.9283503293991089, "log_odds_ratio": -0.23160037398338318, "logits/chosen": 1.087787389755249, "logits/rejected": 1.0893429517745972, "logps/chosen": -1.5494199991226196, "logps/rejected": -3.2874667644500732, "loss": 0.48, "nll_loss": 0.45679372549057007, "rewards/accuracies": 1.0, "rewards/chosen": -0.15494199097156525, "rewards/margins": 0.17380467057228088, "rewards/rejected": -0.32874664664268494, "step": 6062 }, { "epoch": 16.59958932238193, "grad_norm": 4.035678863525391, "learning_rate": 1.6945205479452054e-07, "log_odds_chosen": 2.4046919345855713, "log_odds_ratio": -0.16143332421779633, "logits/chosen": 1.3095173835754395, "logits/rejected": 1.3613085746765137, "logps/chosen": -1.853500247001648, "logps/rejected": -4.078733921051025, "loss": 0.507, "nll_loss": 0.49084708094596863, "rewards/accuracies": 1.0, "rewards/chosen": -0.18535003066062927, "rewards/margins": 0.22252339124679565, "rewards/rejected": -0.4078734219074249, "step": 6063 }, { "epoch": 16.602327173169062, "grad_norm": 4.3990349769592285, "learning_rate": 1.6931506849315067e-07, "log_odds_chosen": 4.489635944366455, "log_odds_ratio": -0.214929461479187, "logits/chosen": 1.1665574312210083, "logits/rejected": 1.159844994544983, "logps/chosen": -1.9331222772598267, "logps/rejected": -6.308223724365234, "loss": 0.5922, "nll_loss": 0.5706770420074463, "rewards/accuracies": 1.0, "rewards/chosen": -0.19331224262714386, "rewards/margins": 0.43751010298728943, "rewards/rejected": -0.6308223605155945, "step": 6064 }, { "epoch": 16.605065023956193, "grad_norm": 8.152033805847168, "learning_rate": 1.6917808219178082e-07, "log_odds_chosen": 1.32303786277771, "log_odds_ratio": -0.8160257339477539, "logits/chosen": 1.2584489583969116, "logits/rejected": 1.3614133596420288, "logps/chosen": -3.0049357414245605, "logps/rejected": -4.2344183921813965, "loss": 0.5968, "nll_loss": 0.5152078866958618, "rewards/accuracies": 0.875, "rewards/chosen": -0.30049359798431396, "rewards/margins": 0.12294826656579971, "rewards/rejected": -0.4234418570995331, "step": 6065 }, { "epoch": 16.607802874743328, "grad_norm": 3.4052298069000244, "learning_rate": 1.6904109589041097e-07, "log_odds_chosen": 2.81217622756958, "log_odds_ratio": -0.2083820402622223, "logits/chosen": 1.2159124612808228, "logits/rejected": 1.2340742349624634, "logps/chosen": -1.8785500526428223, "logps/rejected": -4.503303050994873, "loss": 0.45, "nll_loss": 0.42914333939552307, "rewards/accuracies": 1.0, "rewards/chosen": -0.18785500526428223, "rewards/margins": 0.26247528195381165, "rewards/rejected": -0.45033031702041626, "step": 6066 }, { "epoch": 16.61054072553046, "grad_norm": 6.484259605407715, "learning_rate": 1.6890410958904107e-07, "log_odds_chosen": 2.781623363494873, "log_odds_ratio": -0.3645055294036865, "logits/chosen": 1.2480368614196777, "logits/rejected": 1.2172784805297852, "logps/chosen": -1.867944359779358, "logps/rejected": -4.448294639587402, "loss": 0.5915, "nll_loss": 0.5550165772438049, "rewards/accuracies": 0.75, "rewards/chosen": -0.1867944598197937, "rewards/margins": 0.2580350637435913, "rewards/rejected": -0.444829523563385, "step": 6067 }, { "epoch": 16.61327857631759, "grad_norm": 3.7301394939422607, "learning_rate": 1.6876712328767122e-07, "log_odds_chosen": 1.2700855731964111, "log_odds_ratio": -0.29527831077575684, "logits/chosen": 1.1599888801574707, "logits/rejected": 1.1371017694473267, "logps/chosen": -1.6450419425964355, "logps/rejected": -2.765990734100342, "loss": 0.4699, "nll_loss": 0.44040071964263916, "rewards/accuracies": 0.875, "rewards/chosen": -0.16450420022010803, "rewards/margins": 0.11209487915039062, "rewards/rejected": -0.27659907937049866, "step": 6068 }, { "epoch": 16.61601642710472, "grad_norm": 3.7456412315368652, "learning_rate": 1.6863013698630135e-07, "log_odds_chosen": 3.6870460510253906, "log_odds_ratio": -0.09619970619678497, "logits/chosen": 0.8722937107086182, "logits/rejected": 0.765451192855835, "logps/chosen": -1.3948535919189453, "logps/rejected": -4.721269607543945, "loss": 0.53, "nll_loss": 0.5203601121902466, "rewards/accuracies": 1.0, "rewards/chosen": -0.13948535919189453, "rewards/margins": 0.3326416313648224, "rewards/rejected": -0.4721269905567169, "step": 6069 }, { "epoch": 16.618754277891856, "grad_norm": 3.9739227294921875, "learning_rate": 1.684931506849315e-07, "log_odds_chosen": 3.2626938819885254, "log_odds_ratio": -0.16333584487438202, "logits/chosen": 1.096817135810852, "logits/rejected": 1.1407582759857178, "logps/chosen": -2.047708034515381, "logps/rejected": -5.104373455047607, "loss": 0.6485, "nll_loss": 0.6321513652801514, "rewards/accuracies": 1.0, "rewards/chosen": -0.20477080345153809, "rewards/margins": 0.3056665360927582, "rewards/rejected": -0.5104373693466187, "step": 6070 }, { "epoch": 16.621492128678987, "grad_norm": 3.6138999462127686, "learning_rate": 1.6835616438356165e-07, "log_odds_chosen": 2.8790626525878906, "log_odds_ratio": -0.14304903149604797, "logits/chosen": 1.2313611507415771, "logits/rejected": 1.2478663921356201, "logps/chosen": -2.418656349182129, "logps/rejected": -5.18407678604126, "loss": 0.5455, "nll_loss": 0.5311646461486816, "rewards/accuracies": 1.0, "rewards/chosen": -0.2418656349182129, "rewards/margins": 0.2765420079231262, "rewards/rejected": -0.5184077024459839, "step": 6071 }, { "epoch": 16.62422997946612, "grad_norm": 4.055086135864258, "learning_rate": 1.6821917808219178e-07, "log_odds_chosen": 3.7367095947265625, "log_odds_ratio": -0.16601590812206268, "logits/chosen": 1.0981661081314087, "logits/rejected": 1.2216428518295288, "logps/chosen": -2.2444236278533936, "logps/rejected": -5.855284690856934, "loss": 0.6131, "nll_loss": 0.5964864492416382, "rewards/accuracies": 1.0, "rewards/chosen": -0.22444237768650055, "rewards/margins": 0.3610861301422119, "rewards/rejected": -0.5855284929275513, "step": 6072 }, { "epoch": 16.62696783025325, "grad_norm": 3.682725667953491, "learning_rate": 1.6808219178082193e-07, "log_odds_chosen": 2.8288614749908447, "log_odds_ratio": -0.10819826275110245, "logits/chosen": 1.0904874801635742, "logits/rejected": 1.1207808256149292, "logps/chosen": -1.8509976863861084, "logps/rejected": -4.448185443878174, "loss": 0.4554, "nll_loss": 0.4445914924144745, "rewards/accuracies": 1.0, "rewards/chosen": -0.1850997805595398, "rewards/margins": 0.25971871614456177, "rewards/rejected": -0.44481852650642395, "step": 6073 }, { "epoch": 16.629705681040384, "grad_norm": 3.4132983684539795, "learning_rate": 1.6794520547945203e-07, "log_odds_chosen": 3.3299989700317383, "log_odds_ratio": -0.20898348093032837, "logits/chosen": 1.1543532609939575, "logits/rejected": 1.1799226999282837, "logps/chosen": -1.3392809629440308, "logps/rejected": -4.401759624481201, "loss": 0.4858, "nll_loss": 0.46486279368400574, "rewards/accuracies": 0.875, "rewards/chosen": -0.1339280903339386, "rewards/margins": 0.30624786019325256, "rewards/rejected": -0.44017598032951355, "step": 6074 }, { "epoch": 16.632443531827516, "grad_norm": 7.24567985534668, "learning_rate": 1.6780821917808218e-07, "log_odds_chosen": 2.401305675506592, "log_odds_ratio": -0.38482794165611267, "logits/chosen": 0.9467660188674927, "logits/rejected": 0.8787683248519897, "logps/chosen": -2.1145081520080566, "logps/rejected": -4.352198600769043, "loss": 0.6431, "nll_loss": 0.6046052575111389, "rewards/accuracies": 0.875, "rewards/chosen": -0.21145081520080566, "rewards/margins": 0.22376905381679535, "rewards/rejected": -0.4352198839187622, "step": 6075 }, { "epoch": 16.635181382614647, "grad_norm": 3.789687156677246, "learning_rate": 1.676712328767123e-07, "log_odds_chosen": 4.485386848449707, "log_odds_ratio": -0.08192525804042816, "logits/chosen": 1.1695719957351685, "logits/rejected": 1.1977508068084717, "logps/chosen": -1.736717700958252, "logps/rejected": -6.003880500793457, "loss": 0.555, "nll_loss": 0.54685378074646, "rewards/accuracies": 1.0, "rewards/chosen": -0.17367176711559296, "rewards/margins": 0.42671629786491394, "rewards/rejected": -0.6003881096839905, "step": 6076 }, { "epoch": 16.637919233401778, "grad_norm": 3.974937677383423, "learning_rate": 1.6753424657534246e-07, "log_odds_chosen": 3.3475024700164795, "log_odds_ratio": -0.41182276606559753, "logits/chosen": 0.8263946771621704, "logits/rejected": 0.8553973436355591, "logps/chosen": -1.7075121402740479, "logps/rejected": -4.85560417175293, "loss": 0.5157, "nll_loss": 0.47447019815444946, "rewards/accuracies": 0.875, "rewards/chosen": -0.17075121402740479, "rewards/margins": 0.31480923295021057, "rewards/rejected": -0.48556041717529297, "step": 6077 }, { "epoch": 16.640657084188913, "grad_norm": 3.47725510597229, "learning_rate": 1.673972602739726e-07, "log_odds_chosen": 2.548851490020752, "log_odds_ratio": -0.1622523069381714, "logits/chosen": 1.1249797344207764, "logits/rejected": 1.1333872079849243, "logps/chosen": -1.6998100280761719, "logps/rejected": -4.073626518249512, "loss": 0.5399, "nll_loss": 0.5236256122589111, "rewards/accuracies": 1.0, "rewards/chosen": -0.1699810028076172, "rewards/margins": 0.2373816817998886, "rewards/rejected": -0.407362699508667, "step": 6078 }, { "epoch": 16.643394934976044, "grad_norm": 7.142638206481934, "learning_rate": 1.6726027397260274e-07, "log_odds_chosen": 1.631919264793396, "log_odds_ratio": -0.8214350938796997, "logits/chosen": 0.9982584714889526, "logits/rejected": 1.0979783535003662, "logps/chosen": -2.718751907348633, "logps/rejected": -4.240284442901611, "loss": 0.6189, "nll_loss": 0.536787211894989, "rewards/accuracies": 0.75, "rewards/chosen": -0.27187520265579224, "rewards/margins": 0.15215322375297546, "rewards/rejected": -0.4240284562110901, "step": 6079 }, { "epoch": 16.646132785763175, "grad_norm": 3.5310022830963135, "learning_rate": 1.671232876712329e-07, "log_odds_chosen": 2.7205138206481934, "log_odds_ratio": -0.18240343034267426, "logits/chosen": 1.3868626356124878, "logits/rejected": 1.3505988121032715, "logps/chosen": -1.5808062553405762, "logps/rejected": -4.084718704223633, "loss": 0.4679, "nll_loss": 0.4496225118637085, "rewards/accuracies": 1.0, "rewards/chosen": -0.15808063745498657, "rewards/margins": 0.2503912150859833, "rewards/rejected": -0.40847188234329224, "step": 6080 }, { "epoch": 16.648870636550306, "grad_norm": 3.0442817211151123, "learning_rate": 1.66986301369863e-07, "log_odds_chosen": 3.7495157718658447, "log_odds_ratio": -0.09241710603237152, "logits/chosen": 1.1176131963729858, "logits/rejected": 1.1539466381072998, "logps/chosen": -1.9707841873168945, "logps/rejected": -5.54047966003418, "loss": 0.5594, "nll_loss": 0.5501083731651306, "rewards/accuracies": 1.0, "rewards/chosen": -0.19707843661308289, "rewards/margins": 0.35696956515312195, "rewards/rejected": -0.5540479421615601, "step": 6081 }, { "epoch": 16.65160848733744, "grad_norm": 3.334300994873047, "learning_rate": 1.6684931506849314e-07, "log_odds_chosen": 1.7106914520263672, "log_odds_ratio": -0.2740481495857239, "logits/chosen": 1.0214393138885498, "logits/rejected": 0.9841562509536743, "logps/chosen": -1.9349846839904785, "logps/rejected": -3.528181552886963, "loss": 0.6057, "nll_loss": 0.5782848596572876, "rewards/accuracies": 0.875, "rewards/chosen": -0.19349849224090576, "rewards/margins": 0.159319669008255, "rewards/rejected": -0.35281816124916077, "step": 6082 }, { "epoch": 16.654346338124572, "grad_norm": 3.4067933559417725, "learning_rate": 1.6671232876712327e-07, "log_odds_chosen": 3.69223690032959, "log_odds_ratio": -0.10158662497997284, "logits/chosen": 1.1033568382263184, "logits/rejected": 1.01798415184021, "logps/chosen": -1.7995775938034058, "logps/rejected": -5.059694290161133, "loss": 0.5461, "nll_loss": 0.5359646677970886, "rewards/accuracies": 1.0, "rewards/chosen": -0.17995776236057281, "rewards/margins": 0.32601165771484375, "rewards/rejected": -0.5059694647789001, "step": 6083 }, { "epoch": 16.657084188911703, "grad_norm": 4.238709926605225, "learning_rate": 1.6657534246575342e-07, "log_odds_chosen": 2.198354721069336, "log_odds_ratio": -0.38374292850494385, "logits/chosen": 1.2908893823623657, "logits/rejected": 1.2863047122955322, "logps/chosen": -2.049173355102539, "logps/rejected": -4.125661373138428, "loss": 0.6066, "nll_loss": 0.5682041645050049, "rewards/accuracies": 0.75, "rewards/chosen": -0.20491734147071838, "rewards/margins": 0.20764882862567902, "rewards/rejected": -0.4125661551952362, "step": 6084 }, { "epoch": 16.659822039698838, "grad_norm": 3.4288413524627686, "learning_rate": 1.6643835616438357e-07, "log_odds_chosen": 2.515775203704834, "log_odds_ratio": -0.35490989685058594, "logits/chosen": 1.2264782190322876, "logits/rejected": 1.2299774885177612, "logps/chosen": -1.8017796277999878, "logps/rejected": -4.243516445159912, "loss": 0.4846, "nll_loss": 0.4491136372089386, "rewards/accuracies": 0.75, "rewards/chosen": -0.1801779717206955, "rewards/margins": 0.24417367577552795, "rewards/rejected": -0.42435163259506226, "step": 6085 }, { "epoch": 16.66255989048597, "grad_norm": 3.1983578205108643, "learning_rate": 1.663013698630137e-07, "log_odds_chosen": 5.1803717613220215, "log_odds_ratio": -0.10919122397899628, "logits/chosen": 1.1959689855575562, "logits/rejected": 1.2339694499969482, "logps/chosen": -1.5275003910064697, "logps/rejected": -6.413212299346924, "loss": 0.483, "nll_loss": 0.4720853567123413, "rewards/accuracies": 1.0, "rewards/chosen": -0.15275004506111145, "rewards/margins": 0.4885711669921875, "rewards/rejected": -0.6413211822509766, "step": 6086 }, { "epoch": 16.6652977412731, "grad_norm": 3.439850330352783, "learning_rate": 1.6616438356164382e-07, "log_odds_chosen": 3.3673675060272217, "log_odds_ratio": -0.1903829127550125, "logits/chosen": 1.2670480012893677, "logits/rejected": 1.2135095596313477, "logps/chosen": -1.6232922077178955, "logps/rejected": -4.77180290222168, "loss": 0.4335, "nll_loss": 0.41442370414733887, "rewards/accuracies": 1.0, "rewards/chosen": -0.16232922673225403, "rewards/margins": 0.3148510456085205, "rewards/rejected": -0.47718024253845215, "step": 6087 }, { "epoch": 16.66803559206023, "grad_norm": 5.207800388336182, "learning_rate": 1.6602739726027395e-07, "log_odds_chosen": 1.681084156036377, "log_odds_ratio": -0.4501029849052429, "logits/chosen": 1.1083892583847046, "logits/rejected": 1.1106338500976562, "logps/chosen": -1.7157647609710693, "logps/rejected": -3.1426572799682617, "loss": 0.5745, "nll_loss": 0.5295156836509705, "rewards/accuracies": 0.875, "rewards/chosen": -0.17157647013664246, "rewards/margins": 0.14268922805786133, "rewards/rejected": -0.3142656981945038, "step": 6088 }, { "epoch": 16.670773442847366, "grad_norm": 3.647840738296509, "learning_rate": 1.658904109589041e-07, "log_odds_chosen": 2.3993496894836426, "log_odds_ratio": -0.18204262852668762, "logits/chosen": 1.258095622062683, "logits/rejected": 1.2406082153320312, "logps/chosen": -1.821999192237854, "logps/rejected": -4.041017055511475, "loss": 0.4888, "nll_loss": 0.4706398546695709, "rewards/accuracies": 1.0, "rewards/chosen": -0.18219992518424988, "rewards/margins": 0.2219017893075943, "rewards/rejected": -0.404101699590683, "step": 6089 }, { "epoch": 16.673511293634498, "grad_norm": 3.7748472690582275, "learning_rate": 1.6575342465753425e-07, "log_odds_chosen": 3.2618560791015625, "log_odds_ratio": -0.2106248438358307, "logits/chosen": 0.8260279893875122, "logits/rejected": 0.7544447779655457, "logps/chosen": -2.057058334350586, "logps/rejected": -5.102214813232422, "loss": 0.5124, "nll_loss": 0.491355299949646, "rewards/accuracies": 1.0, "rewards/chosen": -0.20570582151412964, "rewards/margins": 0.30451565980911255, "rewards/rejected": -0.5102214813232422, "step": 6090 }, { "epoch": 16.67624914442163, "grad_norm": 3.7409121990203857, "learning_rate": 1.6561643835616438e-07, "log_odds_chosen": 2.034397602081299, "log_odds_ratio": -0.2188342809677124, "logits/chosen": 1.1958892345428467, "logits/rejected": 1.0681158304214478, "logps/chosen": -1.7930328845977783, "logps/rejected": -3.6345314979553223, "loss": 0.5148, "nll_loss": 0.49291402101516724, "rewards/accuracies": 1.0, "rewards/chosen": -0.17930331826210022, "rewards/margins": 0.184149831533432, "rewards/rejected": -0.3634531497955322, "step": 6091 }, { "epoch": 16.67898699520876, "grad_norm": 3.849609851837158, "learning_rate": 1.6547945205479453e-07, "log_odds_chosen": 1.5788187980651855, "log_odds_ratio": -0.29207876324653625, "logits/chosen": 0.9382195472717285, "logits/rejected": 0.9465621709823608, "logps/chosen": -1.6938453912734985, "logps/rejected": -3.105233669281006, "loss": 0.4916, "nll_loss": 0.4623938500881195, "rewards/accuracies": 0.875, "rewards/chosen": -0.16938453912734985, "rewards/margins": 0.14113883674144745, "rewards/rejected": -0.3105233907699585, "step": 6092 }, { "epoch": 16.681724845995895, "grad_norm": 5.739569664001465, "learning_rate": 1.6534246575342465e-07, "log_odds_chosen": 2.39833402633667, "log_odds_ratio": -0.4582121968269348, "logits/chosen": 1.2241041660308838, "logits/rejected": 1.2709496021270752, "logps/chosen": -2.767246723175049, "logps/rejected": -5.122352600097656, "loss": 0.6602, "nll_loss": 0.6143606901168823, "rewards/accuracies": 0.75, "rewards/chosen": -0.2767246961593628, "rewards/margins": 0.23551055788993835, "rewards/rejected": -0.5122352242469788, "step": 6093 }, { "epoch": 16.684462696783026, "grad_norm": 3.7085394859313965, "learning_rate": 1.6520547945205478e-07, "log_odds_chosen": 2.203084945678711, "log_odds_ratio": -0.2340575009584427, "logits/chosen": 0.8701191544532776, "logits/rejected": 0.8728342652320862, "logps/chosen": -1.32029390335083, "logps/rejected": -3.158445358276367, "loss": 0.4198, "nll_loss": 0.3963494896888733, "rewards/accuracies": 1.0, "rewards/chosen": -0.13202939927577972, "rewards/margins": 0.183815136551857, "rewards/rejected": -0.3158445358276367, "step": 6094 }, { "epoch": 16.687200547570157, "grad_norm": 3.990351438522339, "learning_rate": 1.650684931506849e-07, "log_odds_chosen": 2.2193024158477783, "log_odds_ratio": -0.19386425614356995, "logits/chosen": 1.172044038772583, "logits/rejected": 1.151895523071289, "logps/chosen": -1.9054577350616455, "logps/rejected": -3.8387746810913086, "loss": 0.5007, "nll_loss": 0.4812997281551361, "rewards/accuracies": 1.0, "rewards/chosen": -0.19054579734802246, "rewards/margins": 0.19333168864250183, "rewards/rejected": -0.3838774859905243, "step": 6095 }, { "epoch": 16.689938398357288, "grad_norm": 5.279554843902588, "learning_rate": 1.6493150684931506e-07, "log_odds_chosen": 1.9196410179138184, "log_odds_ratio": -0.24478282034397125, "logits/chosen": 1.08396577835083, "logits/rejected": 0.9434080123901367, "logps/chosen": -2.324990749359131, "logps/rejected": -4.062143325805664, "loss": 0.5371, "nll_loss": 0.5126374959945679, "rewards/accuracies": 0.875, "rewards/chosen": -0.23249909281730652, "rewards/margins": 0.1737152487039566, "rewards/rejected": -0.40621429681777954, "step": 6096 }, { "epoch": 16.692676249144423, "grad_norm": 3.709620952606201, "learning_rate": 1.647945205479452e-07, "log_odds_chosen": 2.0975112915039062, "log_odds_ratio": -0.20227345824241638, "logits/chosen": 0.8937612771987915, "logits/rejected": 0.8472464680671692, "logps/chosen": -1.5276763439178467, "logps/rejected": -3.3921663761138916, "loss": 0.5294, "nll_loss": 0.509166419506073, "rewards/accuracies": 1.0, "rewards/chosen": -0.1527676284313202, "rewards/margins": 0.18644902110099792, "rewards/rejected": -0.3392166495323181, "step": 6097 }, { "epoch": 16.695414099931554, "grad_norm": 3.847825527191162, "learning_rate": 1.6465753424657534e-07, "log_odds_chosen": 3.862277030944824, "log_odds_ratio": -0.17488637566566467, "logits/chosen": 0.9740695953369141, "logits/rejected": 0.9613035321235657, "logps/chosen": -1.959653377532959, "logps/rejected": -5.661303520202637, "loss": 0.5133, "nll_loss": 0.49583935737609863, "rewards/accuracies": 1.0, "rewards/chosen": -0.19596534967422485, "rewards/margins": 0.37016499042510986, "rewards/rejected": -0.5661302804946899, "step": 6098 }, { "epoch": 16.698151950718685, "grad_norm": 3.4491846561431885, "learning_rate": 1.645205479452055e-07, "log_odds_chosen": 1.7557556629180908, "log_odds_ratio": -0.3145638406276703, "logits/chosen": 0.8921401500701904, "logits/rejected": 0.8913400173187256, "logps/chosen": -1.7602574825286865, "logps/rejected": -3.3691506385803223, "loss": 0.487, "nll_loss": 0.4555315673351288, "rewards/accuracies": 1.0, "rewards/chosen": -0.17602577805519104, "rewards/margins": 0.16088929772377014, "rewards/rejected": -0.3369150459766388, "step": 6099 }, { "epoch": 16.700889801505816, "grad_norm": 4.810916423797607, "learning_rate": 1.643835616438356e-07, "log_odds_chosen": 3.5897624492645264, "log_odds_ratio": -0.35681527853012085, "logits/chosen": 0.999154269695282, "logits/rejected": 0.9398698806762695, "logps/chosen": -1.7842092514038086, "logps/rejected": -5.221958160400391, "loss": 0.5578, "nll_loss": 0.5221214890480042, "rewards/accuracies": 0.875, "rewards/chosen": -0.17842093110084534, "rewards/margins": 0.3437749445438385, "rewards/rejected": -0.5221958160400391, "step": 6100 }, { "epoch": 16.70362765229295, "grad_norm": 6.000210762023926, "learning_rate": 1.6424657534246574e-07, "log_odds_chosen": 1.0299293994903564, "log_odds_ratio": -0.44287651777267456, "logits/chosen": 1.1157768964767456, "logits/rejected": 1.057578682899475, "logps/chosen": -1.9421125650405884, "logps/rejected": -2.7858498096466064, "loss": 0.474, "nll_loss": 0.4297354221343994, "rewards/accuracies": 0.875, "rewards/chosen": -0.19421127438545227, "rewards/margins": 0.08437371999025345, "rewards/rejected": -0.2785849869251251, "step": 6101 }, { "epoch": 16.706365503080082, "grad_norm": 4.2308030128479, "learning_rate": 1.6410958904109586e-07, "log_odds_chosen": 2.1026220321655273, "log_odds_ratio": -0.22721660137176514, "logits/chosen": 1.2393465042114258, "logits/rejected": 1.2930289506912231, "logps/chosen": -2.27534818649292, "logps/rejected": -4.225200653076172, "loss": 0.5217, "nll_loss": 0.4989777207374573, "rewards/accuracies": 1.0, "rewards/chosen": -0.22753483057022095, "rewards/margins": 0.1949852555990219, "rewards/rejected": -0.42252007126808167, "step": 6102 }, { "epoch": 16.709103353867214, "grad_norm": 4.404942512512207, "learning_rate": 1.6397260273972602e-07, "log_odds_chosen": 4.746928691864014, "log_odds_ratio": -0.1573522388935089, "logits/chosen": 1.2097156047821045, "logits/rejected": 1.2826112508773804, "logps/chosen": -3.190441608428955, "logps/rejected": -7.851820945739746, "loss": 0.6286, "nll_loss": 0.6128430366516113, "rewards/accuracies": 0.875, "rewards/chosen": -0.31904417276382446, "rewards/margins": 0.46613791584968567, "rewards/rejected": -0.7851820588111877, "step": 6103 }, { "epoch": 16.711841204654345, "grad_norm": 3.330049991607666, "learning_rate": 1.6383561643835617e-07, "log_odds_chosen": 4.3708977699279785, "log_odds_ratio": -0.18178078532218933, "logits/chosen": 1.000251054763794, "logits/rejected": 1.0090982913970947, "logps/chosen": -2.1704108715057373, "logps/rejected": -6.442445755004883, "loss": 0.5788, "nll_loss": 0.5605879426002502, "rewards/accuracies": 1.0, "rewards/chosen": -0.21704109013080597, "rewards/margins": 0.4272034764289856, "rewards/rejected": -0.6442446112632751, "step": 6104 }, { "epoch": 16.71457905544148, "grad_norm": 3.8379220962524414, "learning_rate": 1.636986301369863e-07, "log_odds_chosen": 4.1334991455078125, "log_odds_ratio": -0.15981902182102203, "logits/chosen": 1.2056748867034912, "logits/rejected": 1.2063591480255127, "logps/chosen": -1.7854801416397095, "logps/rejected": -5.722630500793457, "loss": 0.5477, "nll_loss": 0.5317288637161255, "rewards/accuracies": 1.0, "rewards/chosen": -0.17854800820350647, "rewards/margins": 0.3937150537967682, "rewards/rejected": -0.5722630620002747, "step": 6105 }, { "epoch": 16.71731690622861, "grad_norm": 4.089118957519531, "learning_rate": 1.6356164383561645e-07, "log_odds_chosen": 3.812915086746216, "log_odds_ratio": -0.29580575227737427, "logits/chosen": 1.3092001676559448, "logits/rejected": 1.2977159023284912, "logps/chosen": -1.6006566286087036, "logps/rejected": -5.210913181304932, "loss": 0.5347, "nll_loss": 0.5050874352455139, "rewards/accuracies": 0.875, "rewards/chosen": -0.1600656807422638, "rewards/margins": 0.36102569103240967, "rewards/rejected": -0.5210913419723511, "step": 6106 }, { "epoch": 16.720054757015742, "grad_norm": 7.751184463500977, "learning_rate": 1.6342465753424655e-07, "log_odds_chosen": 2.450077533721924, "log_odds_ratio": -0.16801021993160248, "logits/chosen": 1.0235811471939087, "logits/rejected": 0.8933829069137573, "logps/chosen": -2.0842950344085693, "logps/rejected": -4.368302345275879, "loss": 0.5418, "nll_loss": 0.52501380443573, "rewards/accuracies": 1.0, "rewards/chosen": -0.2084295153617859, "rewards/margins": 0.22840075194835663, "rewards/rejected": -0.43683022260665894, "step": 6107 }, { "epoch": 16.722792607802873, "grad_norm": 3.443913698196411, "learning_rate": 1.632876712328767e-07, "log_odds_chosen": 2.8564159870147705, "log_odds_ratio": -0.16084319353103638, "logits/chosen": 1.026004672050476, "logits/rejected": 1.0415147542953491, "logps/chosen": -1.6441890001296997, "logps/rejected": -4.298466205596924, "loss": 0.5737, "nll_loss": 0.5575713515281677, "rewards/accuracies": 1.0, "rewards/chosen": -0.16441892087459564, "rewards/margins": 0.26542770862579346, "rewards/rejected": -0.4298466145992279, "step": 6108 }, { "epoch": 16.725530458590008, "grad_norm": 3.2876503467559814, "learning_rate": 1.6315068493150685e-07, "log_odds_chosen": 2.982254981994629, "log_odds_ratio": -0.2576436996459961, "logits/chosen": 1.1392467021942139, "logits/rejected": 1.1474595069885254, "logps/chosen": -1.6497688293457031, "logps/rejected": -4.466543197631836, "loss": 0.4847, "nll_loss": 0.4589086174964905, "rewards/accuracies": 0.875, "rewards/chosen": -0.16497687995433807, "rewards/margins": 0.28167739510536194, "rewards/rejected": -0.4466543197631836, "step": 6109 }, { "epoch": 16.72826830937714, "grad_norm": 3.832564115524292, "learning_rate": 1.6301369863013698e-07, "log_odds_chosen": 2.83241868019104, "log_odds_ratio": -0.1893267184495926, "logits/chosen": 1.2845951318740845, "logits/rejected": 1.2608000040054321, "logps/chosen": -1.6237587928771973, "logps/rejected": -4.206667423248291, "loss": 0.4662, "nll_loss": 0.4472741186618805, "rewards/accuracies": 1.0, "rewards/chosen": -0.16237586736679077, "rewards/margins": 0.2582908570766449, "rewards/rejected": -0.42066672444343567, "step": 6110 }, { "epoch": 16.73100616016427, "grad_norm": 3.6822903156280518, "learning_rate": 1.6287671232876713e-07, "log_odds_chosen": 3.835306406021118, "log_odds_ratio": -0.1286129504442215, "logits/chosen": 1.1798436641693115, "logits/rejected": 1.2685465812683105, "logps/chosen": -2.1447296142578125, "logps/rejected": -5.861398696899414, "loss": 0.543, "nll_loss": 0.5301305651664734, "rewards/accuracies": 0.875, "rewards/chosen": -0.2144729495048523, "rewards/margins": 0.37166693806648254, "rewards/rejected": -0.5861399173736572, "step": 6111 }, { "epoch": 16.733744010951405, "grad_norm": 3.282442569732666, "learning_rate": 1.6273972602739725e-07, "log_odds_chosen": 3.662923812866211, "log_odds_ratio": -0.1235492080450058, "logits/chosen": 1.0215164422988892, "logits/rejected": 1.0108500719070435, "logps/chosen": -1.7696932554244995, "logps/rejected": -5.160627841949463, "loss": 0.4774, "nll_loss": 0.46505993604660034, "rewards/accuracies": 1.0, "rewards/chosen": -0.17696931958198547, "rewards/margins": 0.3390934467315674, "rewards/rejected": -0.5160627365112305, "step": 6112 }, { "epoch": 16.736481861738536, "grad_norm": 5.515312194824219, "learning_rate": 1.626027397260274e-07, "log_odds_chosen": 3.8609910011291504, "log_odds_ratio": -0.23089838027954102, "logits/chosen": 1.1924560070037842, "logits/rejected": 1.2183445692062378, "logps/chosen": -2.7632622718811035, "logps/rejected": -6.530673027038574, "loss": 0.6319, "nll_loss": 0.6088478565216064, "rewards/accuracies": 0.875, "rewards/chosen": -0.2763262391090393, "rewards/margins": 0.37674105167388916, "rewards/rejected": -0.6530672311782837, "step": 6113 }, { "epoch": 16.739219712525667, "grad_norm": 4.074826240539551, "learning_rate": 1.624657534246575e-07, "log_odds_chosen": 3.396512746810913, "log_odds_ratio": -0.08326516300439835, "logits/chosen": 1.324450969696045, "logits/rejected": 1.3733012676239014, "logps/chosen": -2.418468713760376, "logps/rejected": -5.681231498718262, "loss": 0.6258, "nll_loss": 0.6174710988998413, "rewards/accuracies": 1.0, "rewards/chosen": -0.24184687435626984, "rewards/margins": 0.3262763023376465, "rewards/rejected": -0.5681232213973999, "step": 6114 }, { "epoch": 16.7419575633128, "grad_norm": 3.708538770675659, "learning_rate": 1.6232876712328766e-07, "log_odds_chosen": 2.548736572265625, "log_odds_ratio": -0.26372087001800537, "logits/chosen": 1.1846952438354492, "logits/rejected": 1.2048401832580566, "logps/chosen": -1.8545756340026855, "logps/rejected": -4.292577266693115, "loss": 0.4643, "nll_loss": 0.4378924071788788, "rewards/accuracies": 0.75, "rewards/chosen": -0.18545755743980408, "rewards/margins": 0.24380016326904297, "rewards/rejected": -0.42925775051116943, "step": 6115 }, { "epoch": 16.744695414099933, "grad_norm": 4.046126842498779, "learning_rate": 1.621917808219178e-07, "log_odds_chosen": 2.6982998847961426, "log_odds_ratio": -0.20107397437095642, "logits/chosen": 1.1692111492156982, "logits/rejected": 1.1506516933441162, "logps/chosen": -2.06064772605896, "logps/rejected": -4.591800689697266, "loss": 0.5486, "nll_loss": 0.5284531116485596, "rewards/accuracies": 1.0, "rewards/chosen": -0.20606477558612823, "rewards/margins": 0.25311529636383057, "rewards/rejected": -0.45918008685112, "step": 6116 }, { "epoch": 16.747433264887064, "grad_norm": 3.8774917125701904, "learning_rate": 1.6205479452054793e-07, "log_odds_chosen": 2.866827964782715, "log_odds_ratio": -0.1775335967540741, "logits/chosen": 1.1368954181671143, "logits/rejected": 1.1878697872161865, "logps/chosen": -2.404284954071045, "logps/rejected": -5.162588119506836, "loss": 0.5417, "nll_loss": 0.5239540934562683, "rewards/accuracies": 1.0, "rewards/chosen": -0.24042847752571106, "rewards/margins": 0.27583029866218567, "rewards/rejected": -0.5162587761878967, "step": 6117 }, { "epoch": 16.750171115674195, "grad_norm": 3.168800115585327, "learning_rate": 1.6191780821917809e-07, "log_odds_chosen": 4.646500587463379, "log_odds_ratio": -0.08465568721294403, "logits/chosen": 1.3244433403015137, "logits/rejected": 1.2936140298843384, "logps/chosen": -1.3227869272232056, "logps/rejected": -5.66873836517334, "loss": 0.5115, "nll_loss": 0.5030624866485596, "rewards/accuracies": 1.0, "rewards/chosen": -0.1322786957025528, "rewards/margins": 0.43459516763687134, "rewards/rejected": -0.5668738484382629, "step": 6118 }, { "epoch": 16.752908966461327, "grad_norm": 3.8026201725006104, "learning_rate": 1.617808219178082e-07, "log_odds_chosen": 3.1146793365478516, "log_odds_ratio": -0.12347577512264252, "logits/chosen": 1.29893958568573, "logits/rejected": 1.3635717630386353, "logps/chosen": -2.3875255584716797, "logps/rejected": -5.40829610824585, "loss": 0.5568, "nll_loss": 0.5444313287734985, "rewards/accuracies": 1.0, "rewards/chosen": -0.2387525737285614, "rewards/margins": 0.302077054977417, "rewards/rejected": -0.5408296585083008, "step": 6119 }, { "epoch": 16.75564681724846, "grad_norm": 3.373136520385742, "learning_rate": 1.6164383561643836e-07, "log_odds_chosen": 3.494946002960205, "log_odds_ratio": -0.15603160858154297, "logits/chosen": 0.8492143750190735, "logits/rejected": 0.8765866756439209, "logps/chosen": -1.559543251991272, "logps/rejected": -4.815176010131836, "loss": 0.5183, "nll_loss": 0.5026475787162781, "rewards/accuracies": 1.0, "rewards/chosen": -0.15595433115959167, "rewards/margins": 0.3255632817745209, "rewards/rejected": -0.48151761293411255, "step": 6120 }, { "epoch": 16.758384668035593, "grad_norm": 3.691800355911255, "learning_rate": 1.615068493150685e-07, "log_odds_chosen": 4.097698211669922, "log_odds_ratio": -0.16505327820777893, "logits/chosen": 1.1495835781097412, "logits/rejected": 1.1297411918640137, "logps/chosen": -1.8341686725616455, "logps/rejected": -5.702631950378418, "loss": 0.5464, "nll_loss": 0.5299161076545715, "rewards/accuracies": 0.875, "rewards/chosen": -0.18341687321662903, "rewards/margins": 0.3868463635444641, "rewards/rejected": -0.570263147354126, "step": 6121 }, { "epoch": 16.761122518822724, "grad_norm": 3.1999082565307617, "learning_rate": 1.6136986301369861e-07, "log_odds_chosen": 2.101112127304077, "log_odds_ratio": -0.16552534699440002, "logits/chosen": 1.3470920324325562, "logits/rejected": 1.239888072013855, "logps/chosen": -1.7833220958709717, "logps/rejected": -3.677980422973633, "loss": 0.4532, "nll_loss": 0.4366559088230133, "rewards/accuracies": 1.0, "rewards/chosen": -0.17833220958709717, "rewards/margins": 0.18946585059165955, "rewards/rejected": -0.3677980303764343, "step": 6122 }, { "epoch": 16.763860369609855, "grad_norm": 3.6426267623901367, "learning_rate": 1.6123287671232877e-07, "log_odds_chosen": 2.5841140747070312, "log_odds_ratio": -0.13165989518165588, "logits/chosen": 0.8043161630630493, "logits/rejected": 0.7788159847259521, "logps/chosen": -1.6923503875732422, "logps/rejected": -4.07315731048584, "loss": 0.5947, "nll_loss": 0.5815292000770569, "rewards/accuracies": 1.0, "rewards/chosen": -0.16923503577709198, "rewards/margins": 0.2380807250738144, "rewards/rejected": -0.407315731048584, "step": 6123 }, { "epoch": 16.76659822039699, "grad_norm": 3.782827854156494, "learning_rate": 1.610958904109589e-07, "log_odds_chosen": 1.626167893409729, "log_odds_ratio": -0.3577829599380493, "logits/chosen": 0.8312908411026001, "logits/rejected": 0.8244740962982178, "logps/chosen": -1.4844865798950195, "logps/rejected": -3.0006117820739746, "loss": 0.521, "nll_loss": 0.4852217137813568, "rewards/accuracies": 0.875, "rewards/chosen": -0.14844867587089539, "rewards/margins": 0.1516125202178955, "rewards/rejected": -0.3000611960887909, "step": 6124 }, { "epoch": 16.76933607118412, "grad_norm": 7.048285961151123, "learning_rate": 1.6095890410958904e-07, "log_odds_chosen": 2.0239737033843994, "log_odds_ratio": -0.30153873562812805, "logits/chosen": 1.134720802307129, "logits/rejected": 1.144365906715393, "logps/chosen": -2.0749616622924805, "logps/rejected": -3.886143445968628, "loss": 0.5316, "nll_loss": 0.5014500617980957, "rewards/accuracies": 0.875, "rewards/chosen": -0.20749616622924805, "rewards/margins": 0.1811181902885437, "rewards/rejected": -0.38861435651779175, "step": 6125 }, { "epoch": 16.772073921971252, "grad_norm": 3.599691867828369, "learning_rate": 1.6082191780821917e-07, "log_odds_chosen": 3.3224472999572754, "log_odds_ratio": -0.11965209245681763, "logits/chosen": 1.2058371305465698, "logits/rejected": 1.2699143886566162, "logps/chosen": -1.7192800045013428, "logps/rejected": -4.82457971572876, "loss": 0.4666, "nll_loss": 0.4546833038330078, "rewards/accuracies": 1.0, "rewards/chosen": -0.17192798852920532, "rewards/margins": 0.3105299472808838, "rewards/rejected": -0.4824579656124115, "step": 6126 }, { "epoch": 16.774811772758383, "grad_norm": 3.6578352451324463, "learning_rate": 1.606849315068493e-07, "log_odds_chosen": 2.5601015090942383, "log_odds_ratio": -0.18255844712257385, "logits/chosen": 1.0891427993774414, "logits/rejected": 1.124091625213623, "logps/chosen": -1.9064501523971558, "logps/rejected": -4.227972984313965, "loss": 0.5923, "nll_loss": 0.5740021467208862, "rewards/accuracies": 1.0, "rewards/chosen": -0.1906450241804123, "rewards/margins": 0.2321522831916809, "rewards/rejected": -0.4227973222732544, "step": 6127 }, { "epoch": 16.777549623545518, "grad_norm": 4.068933486938477, "learning_rate": 1.6054794520547945e-07, "log_odds_chosen": 3.052535057067871, "log_odds_ratio": -0.41628870368003845, "logits/chosen": 0.977421760559082, "logits/rejected": 0.9428144693374634, "logps/chosen": -2.346883535385132, "logps/rejected": -5.281082630157471, "loss": 0.5871, "nll_loss": 0.5455201864242554, "rewards/accuracies": 0.75, "rewards/chosen": -0.23468837141990662, "rewards/margins": 0.29341989755630493, "rewards/rejected": -0.5281082987785339, "step": 6128 }, { "epoch": 16.78028747433265, "grad_norm": 9.245342254638672, "learning_rate": 1.6041095890410957e-07, "log_odds_chosen": 1.69380784034729, "log_odds_ratio": -0.4455747902393341, "logits/chosen": 0.899737536907196, "logits/rejected": 0.8214021921157837, "logps/chosen": -2.106241226196289, "logps/rejected": -3.629316806793213, "loss": 0.5548, "nll_loss": 0.5102832317352295, "rewards/accuracies": 0.875, "rewards/chosen": -0.21062412858009338, "rewards/margins": 0.15230756998062134, "rewards/rejected": -0.3629316985607147, "step": 6129 }, { "epoch": 16.78302532511978, "grad_norm": 7.0464558601379395, "learning_rate": 1.6027397260273973e-07, "log_odds_chosen": 3.4440581798553467, "log_odds_ratio": -0.5549899935722351, "logits/chosen": 1.0567188262939453, "logits/rejected": 1.0590673685073853, "logps/chosen": -2.0385539531707764, "logps/rejected": -5.252799987792969, "loss": 0.5325, "nll_loss": 0.4769969582557678, "rewards/accuracies": 0.875, "rewards/chosen": -0.20385539531707764, "rewards/margins": 0.3214246332645416, "rewards/rejected": -0.5252799987792969, "step": 6130 }, { "epoch": 16.78576317590691, "grad_norm": 4.017941474914551, "learning_rate": 1.6013698630136985e-07, "log_odds_chosen": 3.5332999229431152, "log_odds_ratio": -0.27944666147232056, "logits/chosen": 1.0454020500183105, "logits/rejected": 1.0777981281280518, "logps/chosen": -2.213731288909912, "logps/rejected": -5.623045921325684, "loss": 0.5158, "nll_loss": 0.4878999590873718, "rewards/accuracies": 0.875, "rewards/chosen": -0.22137311100959778, "rewards/margins": 0.3409314751625061, "rewards/rejected": -0.5623046159744263, "step": 6131 }, { "epoch": 16.788501026694046, "grad_norm": 4.920108318328857, "learning_rate": 1.6e-07, "log_odds_chosen": 3.5524892807006836, "log_odds_ratio": -0.17154090106487274, "logits/chosen": 1.1703550815582275, "logits/rejected": 1.1721978187561035, "logps/chosen": -2.4548592567443848, "logps/rejected": -5.877501487731934, "loss": 0.612, "nll_loss": 0.594891369342804, "rewards/accuracies": 1.0, "rewards/chosen": -0.24548593163490295, "rewards/margins": 0.34226420521736145, "rewards/rejected": -0.5877501964569092, "step": 6132 }, { "epoch": 16.791238877481177, "grad_norm": 4.011651039123535, "learning_rate": 1.5986301369863013e-07, "log_odds_chosen": 2.2832651138305664, "log_odds_ratio": -0.289800226688385, "logits/chosen": 1.4010740518569946, "logits/rejected": 1.3580549955368042, "logps/chosen": -1.5129984617233276, "logps/rejected": -3.5890538692474365, "loss": 0.4465, "nll_loss": 0.4175190031528473, "rewards/accuracies": 0.75, "rewards/chosen": -0.151299849152565, "rewards/margins": 0.20760555565357208, "rewards/rejected": -0.3589054346084595, "step": 6133 }, { "epoch": 16.79397672826831, "grad_norm": 4.053506374359131, "learning_rate": 1.5972602739726025e-07, "log_odds_chosen": 3.7921836376190186, "log_odds_ratio": -0.27884364128112793, "logits/chosen": 1.2902414798736572, "logits/rejected": 1.3001084327697754, "logps/chosen": -1.8468809127807617, "logps/rejected": -5.43055534362793, "loss": 0.5887, "nll_loss": 0.5608159303665161, "rewards/accuracies": 0.875, "rewards/chosen": -0.18468812108039856, "rewards/margins": 0.3583674728870392, "rewards/rejected": -0.543055534362793, "step": 6134 }, { "epoch": 16.79671457905544, "grad_norm": 4.379744052886963, "learning_rate": 1.595890410958904e-07, "log_odds_chosen": 3.081127643585205, "log_odds_ratio": -0.31894534826278687, "logits/chosen": 1.0280892848968506, "logits/rejected": 1.047959804534912, "logps/chosen": -1.932375192642212, "logps/rejected": -4.865741729736328, "loss": 0.5227, "nll_loss": 0.49080008268356323, "rewards/accuracies": 0.75, "rewards/chosen": -0.1932375133037567, "rewards/margins": 0.2933366894721985, "rewards/rejected": -0.4865742027759552, "step": 6135 }, { "epoch": 16.799452429842574, "grad_norm": 4.116270542144775, "learning_rate": 1.5945205479452053e-07, "log_odds_chosen": 1.9669803380966187, "log_odds_ratio": -0.2625695466995239, "logits/chosen": 0.9148827195167542, "logits/rejected": 0.850210428237915, "logps/chosen": -2.3871307373046875, "logps/rejected": -4.192404747009277, "loss": 0.6189, "nll_loss": 0.5926218032836914, "rewards/accuracies": 1.0, "rewards/chosen": -0.23871305584907532, "rewards/margins": 0.180527463555336, "rewards/rejected": -0.4192405045032501, "step": 6136 }, { "epoch": 16.802190280629706, "grad_norm": 4.452890396118164, "learning_rate": 1.5931506849315068e-07, "log_odds_chosen": 4.697579383850098, "log_odds_ratio": -0.20145924389362335, "logits/chosen": 1.2289372682571411, "logits/rejected": 1.3131132125854492, "logps/chosen": -2.381641149520874, "logps/rejected": -6.979578971862793, "loss": 0.5661, "nll_loss": 0.5459430813789368, "rewards/accuracies": 0.75, "rewards/chosen": -0.23816414177417755, "rewards/margins": 0.4597937762737274, "rewards/rejected": -0.6979578733444214, "step": 6137 }, { "epoch": 16.804928131416837, "grad_norm": 4.042647361755371, "learning_rate": 1.591780821917808e-07, "log_odds_chosen": 4.3027520179748535, "log_odds_ratio": -0.08692695200443268, "logits/chosen": 1.1601126194000244, "logits/rejected": 1.1777774095535278, "logps/chosen": -1.7475457191467285, "logps/rejected": -5.854165077209473, "loss": 0.5034, "nll_loss": 0.49468183517456055, "rewards/accuracies": 1.0, "rewards/chosen": -0.1747545748949051, "rewards/margins": 0.4106619656085968, "rewards/rejected": -0.5854165554046631, "step": 6138 }, { "epoch": 16.80766598220397, "grad_norm": 4.576094150543213, "learning_rate": 1.5904109589041096e-07, "log_odds_chosen": 2.453775644302368, "log_odds_ratio": -0.3109896183013916, "logits/chosen": 0.9761781096458435, "logits/rejected": 1.0290194749832153, "logps/chosen": -1.8888158798217773, "logps/rejected": -4.227643013000488, "loss": 0.4732, "nll_loss": 0.44209474325180054, "rewards/accuracies": 1.0, "rewards/chosen": -0.18888157606124878, "rewards/margins": 0.23388269543647766, "rewards/rejected": -0.42276430130004883, "step": 6139 }, { "epoch": 16.810403832991103, "grad_norm": 3.69494891166687, "learning_rate": 1.5890410958904111e-07, "log_odds_chosen": 3.6338610649108887, "log_odds_ratio": -0.20237791538238525, "logits/chosen": 1.054236650466919, "logits/rejected": 1.0254523754119873, "logps/chosen": -1.2944822311401367, "logps/rejected": -4.685615539550781, "loss": 0.4619, "nll_loss": 0.44170063734054565, "rewards/accuracies": 1.0, "rewards/chosen": -0.12944823503494263, "rewards/margins": 0.3391132950782776, "rewards/rejected": -0.4685615599155426, "step": 6140 }, { "epoch": 16.813141683778234, "grad_norm": 3.729496479034424, "learning_rate": 1.587671232876712e-07, "log_odds_chosen": 4.85052490234375, "log_odds_ratio": -0.07870495319366455, "logits/chosen": 1.377054214477539, "logits/rejected": 1.3797909021377563, "logps/chosen": -2.0217607021331787, "logps/rejected": -6.70412540435791, "loss": 0.5607, "nll_loss": 0.5528462529182434, "rewards/accuracies": 1.0, "rewards/chosen": -0.2021760791540146, "rewards/margins": 0.46823650598526, "rewards/rejected": -0.670412540435791, "step": 6141 }, { "epoch": 16.815879534565365, "grad_norm": 7.304934024810791, "learning_rate": 1.5863013698630137e-07, "log_odds_chosen": 3.094128131866455, "log_odds_ratio": -0.22003553807735443, "logits/chosen": 0.9483601450920105, "logits/rejected": 0.9272089004516602, "logps/chosen": -2.5762975215911865, "logps/rejected": -5.56085205078125, "loss": 0.5509, "nll_loss": 0.5288684368133545, "rewards/accuracies": 0.875, "rewards/chosen": -0.25762975215911865, "rewards/margins": 0.29845547676086426, "rewards/rejected": -0.5560852289199829, "step": 6142 }, { "epoch": 16.8186173853525, "grad_norm": 8.05478572845459, "learning_rate": 1.584931506849315e-07, "log_odds_chosen": 3.012629508972168, "log_odds_ratio": -0.5798195004463196, "logits/chosen": 0.9735898971557617, "logits/rejected": 1.0248997211456299, "logps/chosen": -2.5050106048583984, "logps/rejected": -5.435946464538574, "loss": 0.582, "nll_loss": 0.5239737629890442, "rewards/accuracies": 0.75, "rewards/chosen": -0.2505010664463043, "rewards/margins": 0.29309356212615967, "rewards/rejected": -0.5435946583747864, "step": 6143 }, { "epoch": 16.82135523613963, "grad_norm": 4.709331512451172, "learning_rate": 1.5835616438356164e-07, "log_odds_chosen": 2.051349401473999, "log_odds_ratio": -0.611224353313446, "logits/chosen": 1.1194944381713867, "logits/rejected": 1.1711736917495728, "logps/chosen": -1.9964869022369385, "logps/rejected": -3.8528382778167725, "loss": 0.5116, "nll_loss": 0.4504546821117401, "rewards/accuracies": 0.75, "rewards/chosen": -0.19964870810508728, "rewards/margins": 0.18563511967658997, "rewards/rejected": -0.38528382778167725, "step": 6144 }, { "epoch": 16.824093086926762, "grad_norm": 5.266636371612549, "learning_rate": 1.5821917808219177e-07, "log_odds_chosen": 1.584632396697998, "log_odds_ratio": -0.3016044497489929, "logits/chosen": 1.1337445974349976, "logits/rejected": 1.0656815767288208, "logps/chosen": -1.912248134613037, "logps/rejected": -3.3064889907836914, "loss": 0.478, "nll_loss": 0.44783684611320496, "rewards/accuracies": 1.0, "rewards/chosen": -0.1912248134613037, "rewards/margins": 0.13942407071590424, "rewards/rejected": -0.33064889907836914, "step": 6145 }, { "epoch": 16.826830937713893, "grad_norm": 5.257632732391357, "learning_rate": 1.5808219178082192e-07, "log_odds_chosen": 2.080629587173462, "log_odds_ratio": -0.2756679356098175, "logits/chosen": 1.370008945465088, "logits/rejected": 1.3267314434051514, "logps/chosen": -2.1162185668945312, "logps/rejected": -4.08240270614624, "loss": 0.5787, "nll_loss": 0.5511168241500854, "rewards/accuracies": 1.0, "rewards/chosen": -0.21162188053131104, "rewards/margins": 0.19661840796470642, "rewards/rejected": -0.40824025869369507, "step": 6146 }, { "epoch": 16.829568788501028, "grad_norm": 6.085917949676514, "learning_rate": 1.5794520547945205e-07, "log_odds_chosen": 2.4244673252105713, "log_odds_ratio": -0.18110762536525726, "logits/chosen": 0.8223786354064941, "logits/rejected": 0.6667196750640869, "logps/chosen": -2.1449670791625977, "logps/rejected": -4.4124650955200195, "loss": 0.6276, "nll_loss": 0.6094456315040588, "rewards/accuracies": 1.0, "rewards/chosen": -0.21449671685695648, "rewards/margins": 0.22674979269504547, "rewards/rejected": -0.44124650955200195, "step": 6147 }, { "epoch": 16.83230663928816, "grad_norm": 13.052896499633789, "learning_rate": 1.5780821917808217e-07, "log_odds_chosen": 2.5504660606384277, "log_odds_ratio": -0.5903303623199463, "logits/chosen": 1.1211340427398682, "logits/rejected": 1.0908457040786743, "logps/chosen": -1.8457441329956055, "logps/rejected": -4.176296234130859, "loss": 0.5712, "nll_loss": 0.5121973752975464, "rewards/accuracies": 0.875, "rewards/chosen": -0.1845744103193283, "rewards/margins": 0.2330552637577057, "rewards/rejected": -0.4176296591758728, "step": 6148 }, { "epoch": 16.83504449007529, "grad_norm": 4.943965911865234, "learning_rate": 1.5767123287671232e-07, "log_odds_chosen": 2.346848487854004, "log_odds_ratio": -0.39410078525543213, "logits/chosen": 1.1821696758270264, "logits/rejected": 1.1594581604003906, "logps/chosen": -3.0345401763916016, "logps/rejected": -5.272886276245117, "loss": 0.6877, "nll_loss": 0.6482537984848022, "rewards/accuracies": 0.75, "rewards/chosen": -0.30345404148101807, "rewards/margins": 0.22383460402488708, "rewards/rejected": -0.5272886157035828, "step": 6149 }, { "epoch": 16.83778234086242, "grad_norm": 6.135022163391113, "learning_rate": 1.5753424657534245e-07, "log_odds_chosen": 2.637838363647461, "log_odds_ratio": -0.4085589349269867, "logits/chosen": 1.2272205352783203, "logits/rejected": 1.1233587265014648, "logps/chosen": -2.3804783821105957, "logps/rejected": -4.888710975646973, "loss": 0.5576, "nll_loss": 0.5167465209960938, "rewards/accuracies": 0.875, "rewards/chosen": -0.23804786801338196, "rewards/margins": 0.2508232593536377, "rewards/rejected": -0.48887109756469727, "step": 6150 }, { "epoch": 16.840520191649556, "grad_norm": 3.8171684741973877, "learning_rate": 1.573972602739726e-07, "log_odds_chosen": 2.8284778594970703, "log_odds_ratio": -0.11682397127151489, "logits/chosen": 1.4039298295974731, "logits/rejected": 1.4283024072647095, "logps/chosen": -1.866166591644287, "logps/rejected": -4.486915588378906, "loss": 0.4575, "nll_loss": 0.4458542764186859, "rewards/accuracies": 1.0, "rewards/chosen": -0.1866166740655899, "rewards/margins": 0.26207488775253296, "rewards/rejected": -0.44869157671928406, "step": 6151 }, { "epoch": 16.843258042436688, "grad_norm": 3.726839542388916, "learning_rate": 1.5726027397260273e-07, "log_odds_chosen": 2.7628114223480225, "log_odds_ratio": -0.21207135915756226, "logits/chosen": 1.2709355354309082, "logits/rejected": 1.1610724925994873, "logps/chosen": -1.4044451713562012, "logps/rejected": -3.9367311000823975, "loss": 0.5428, "nll_loss": 0.5215556025505066, "rewards/accuracies": 1.0, "rewards/chosen": -0.14044451713562012, "rewards/margins": 0.2532285451889038, "rewards/rejected": -0.3936730623245239, "step": 6152 }, { "epoch": 16.84599589322382, "grad_norm": 3.525526523590088, "learning_rate": 1.5712328767123288e-07, "log_odds_chosen": 1.8474783897399902, "log_odds_ratio": -0.20480290055274963, "logits/chosen": 0.9922856688499451, "logits/rejected": 0.8997934460639954, "logps/chosen": -1.4375487565994263, "logps/rejected": -3.0496485233306885, "loss": 0.4022, "nll_loss": 0.38174888491630554, "rewards/accuracies": 1.0, "rewards/chosen": -0.14375486969947815, "rewards/margins": 0.16120997071266174, "rewards/rejected": -0.3049648404121399, "step": 6153 }, { "epoch": 16.84873374401095, "grad_norm": 3.533376932144165, "learning_rate": 1.56986301369863e-07, "log_odds_chosen": 1.3924661874771118, "log_odds_ratio": -0.280724436044693, "logits/chosen": 0.9837203025817871, "logits/rejected": 0.8994245529174805, "logps/chosen": -1.9212703704833984, "logps/rejected": -3.163343906402588, "loss": 0.4703, "nll_loss": 0.44226282835006714, "rewards/accuracies": 1.0, "rewards/chosen": -0.1921270489692688, "rewards/margins": 0.12420734018087387, "rewards/rejected": -0.31633439660072327, "step": 6154 }, { "epoch": 16.851471594798085, "grad_norm": 7.729386329650879, "learning_rate": 1.5684931506849313e-07, "log_odds_chosen": 3.593604564666748, "log_odds_ratio": -0.3543926179409027, "logits/chosen": 1.2077786922454834, "logits/rejected": 1.2344716787338257, "logps/chosen": -3.0356829166412354, "logps/rejected": -6.494716644287109, "loss": 0.6157, "nll_loss": 0.5802512168884277, "rewards/accuracies": 0.875, "rewards/chosen": -0.3035683035850525, "rewards/margins": 0.3459034264087677, "rewards/rejected": -0.6494717001914978, "step": 6155 }, { "epoch": 16.854209445585216, "grad_norm": 4.289057731628418, "learning_rate": 1.5671232876712328e-07, "log_odds_chosen": 4.8935418128967285, "log_odds_ratio": -0.1819988191127777, "logits/chosen": 0.9304271936416626, "logits/rejected": 0.9752888083457947, "logps/chosen": -1.9308749437332153, "logps/rejected": -6.683099746704102, "loss": 0.6112, "nll_loss": 0.5929851531982422, "rewards/accuracies": 1.0, "rewards/chosen": -0.19308748841285706, "rewards/margins": 0.47522249817848206, "rewards/rejected": -0.6683099865913391, "step": 6156 }, { "epoch": 16.856947296372347, "grad_norm": 3.975094795227051, "learning_rate": 1.565753424657534e-07, "log_odds_chosen": 4.138826370239258, "log_odds_ratio": -0.058487407863140106, "logits/chosen": 1.2700533866882324, "logits/rejected": 1.3680566549301147, "logps/chosen": -2.094860076904297, "logps/rejected": -6.0520172119140625, "loss": 0.6244, "nll_loss": 0.6185752749443054, "rewards/accuracies": 1.0, "rewards/chosen": -0.2094860076904297, "rewards/margins": 0.3957156836986542, "rewards/rejected": -0.6052017211914062, "step": 6157 }, { "epoch": 16.859685147159478, "grad_norm": 3.3102452754974365, "learning_rate": 1.5643835616438356e-07, "log_odds_chosen": 2.728865623474121, "log_odds_ratio": -0.15565985441207886, "logits/chosen": 0.883650541305542, "logits/rejected": 0.8597938418388367, "logps/chosen": -1.7343299388885498, "logps/rejected": -4.2542924880981445, "loss": 0.4888, "nll_loss": 0.47322314977645874, "rewards/accuracies": 1.0, "rewards/chosen": -0.17343300580978394, "rewards/margins": 0.2519962787628174, "rewards/rejected": -0.42542925477027893, "step": 6158 }, { "epoch": 16.862422997946613, "grad_norm": 3.6619338989257812, "learning_rate": 1.563013698630137e-07, "log_odds_chosen": 2.144688606262207, "log_odds_ratio": -0.2067878544330597, "logits/chosen": 1.0178357362747192, "logits/rejected": 0.9536710977554321, "logps/chosen": -1.5292973518371582, "logps/rejected": -3.4323716163635254, "loss": 0.4448, "nll_loss": 0.42416852712631226, "rewards/accuracies": 1.0, "rewards/chosen": -0.15292972326278687, "rewards/margins": 0.19030743837356567, "rewards/rejected": -0.34323716163635254, "step": 6159 }, { "epoch": 16.865160848733744, "grad_norm": 3.741909980773926, "learning_rate": 1.5616438356164384e-07, "log_odds_chosen": 3.7009642124176025, "log_odds_ratio": -0.1704653799533844, "logits/chosen": 1.243422269821167, "logits/rejected": 1.2134373188018799, "logps/chosen": -1.432973027229309, "logps/rejected": -4.827671527862549, "loss": 0.4813, "nll_loss": 0.46428412199020386, "rewards/accuracies": 1.0, "rewards/chosen": -0.14329729974269867, "rewards/margins": 0.339469850063324, "rewards/rejected": -0.48276716470718384, "step": 6160 }, { "epoch": 16.867898699520875, "grad_norm": 9.750069618225098, "learning_rate": 1.5602739726027396e-07, "log_odds_chosen": 1.0832626819610596, "log_odds_ratio": -0.7883790731430054, "logits/chosen": 1.0290526151657104, "logits/rejected": 1.051133632659912, "logps/chosen": -3.2647812366485596, "logps/rejected": -4.279650688171387, "loss": 0.7096, "nll_loss": 0.6307438015937805, "rewards/accuracies": 0.875, "rewards/chosen": -0.32647809386253357, "rewards/margins": 0.10148695111274719, "rewards/rejected": -0.42796504497528076, "step": 6161 }, { "epoch": 16.870636550308006, "grad_norm": 3.8672826290130615, "learning_rate": 1.558904109589041e-07, "log_odds_chosen": 1.8441226482391357, "log_odds_ratio": -0.20311081409454346, "logits/chosen": 0.8824082016944885, "logits/rejected": 0.7590759992599487, "logps/chosen": -1.646870493888855, "logps/rejected": -3.3173201084136963, "loss": 0.5619, "nll_loss": 0.5416044592857361, "rewards/accuracies": 1.0, "rewards/chosen": -0.16468705236911774, "rewards/margins": 0.16704495251178741, "rewards/rejected": -0.33173200488090515, "step": 6162 }, { "epoch": 16.87337440109514, "grad_norm": 3.6002297401428223, "learning_rate": 1.5575342465753424e-07, "log_odds_chosen": 2.5048158168792725, "log_odds_ratio": -0.28450196981430054, "logits/chosen": 1.1674543619155884, "logits/rejected": 1.1145076751708984, "logps/chosen": -1.4575183391571045, "logps/rejected": -3.7499475479125977, "loss": 0.5959, "nll_loss": 0.5674394965171814, "rewards/accuracies": 0.875, "rewards/chosen": -0.14575183391571045, "rewards/margins": 0.22924292087554932, "rewards/rejected": -0.37499475479125977, "step": 6163 }, { "epoch": 16.876112251882272, "grad_norm": 3.135653257369995, "learning_rate": 1.5561643835616437e-07, "log_odds_chosen": 3.9757134914398193, "log_odds_ratio": -0.13549628853797913, "logits/chosen": 1.0989995002746582, "logits/rejected": 1.0996475219726562, "logps/chosen": -1.377855658531189, "logps/rejected": -5.057117938995361, "loss": 0.4622, "nll_loss": 0.44866886734962463, "rewards/accuracies": 1.0, "rewards/chosen": -0.13778558373451233, "rewards/margins": 0.3679261803627014, "rewards/rejected": -0.5057117938995361, "step": 6164 }, { "epoch": 16.878850102669404, "grad_norm": 7.65541410446167, "learning_rate": 1.5547945205479452e-07, "log_odds_chosen": 4.779942035675049, "log_odds_ratio": -0.09768351167440414, "logits/chosen": 1.163232445716858, "logits/rejected": 1.1710346937179565, "logps/chosen": -2.0300867557525635, "logps/rejected": -6.592777729034424, "loss": 0.5614, "nll_loss": 0.5516679286956787, "rewards/accuracies": 1.0, "rewards/chosen": -0.20300866663455963, "rewards/margins": 0.4562690854072571, "rewards/rejected": -0.6592777967453003, "step": 6165 }, { "epoch": 16.88158795345654, "grad_norm": 9.931272506713867, "learning_rate": 1.5534246575342467e-07, "log_odds_chosen": 3.0015406608581543, "log_odds_ratio": -0.18081983923912048, "logits/chosen": 1.0687559843063354, "logits/rejected": 1.1130367517471313, "logps/chosen": -2.4338202476501465, "logps/rejected": -5.219722747802734, "loss": 0.5801, "nll_loss": 0.5620341300964355, "rewards/accuracies": 1.0, "rewards/chosen": -0.2433820217847824, "rewards/margins": 0.27859026193618774, "rewards/rejected": -0.5219722986221313, "step": 6166 }, { "epoch": 16.88432580424367, "grad_norm": 3.3024749755859375, "learning_rate": 1.5520547945205477e-07, "log_odds_chosen": 4.275143623352051, "log_odds_ratio": -0.10691492259502411, "logits/chosen": 0.9979937076568604, "logits/rejected": 0.928554892539978, "logps/chosen": -1.8334555625915527, "logps/rejected": -5.896254062652588, "loss": 0.479, "nll_loss": 0.4683569371700287, "rewards/accuracies": 1.0, "rewards/chosen": -0.18334555625915527, "rewards/margins": 0.40627989172935486, "rewards/rejected": -0.5896254181861877, "step": 6167 }, { "epoch": 16.8870636550308, "grad_norm": 3.4690134525299072, "learning_rate": 1.5506849315068492e-07, "log_odds_chosen": 2.648393392562866, "log_odds_ratio": -0.17028316855430603, "logits/chosen": 1.401416540145874, "logits/rejected": 1.4316694736480713, "logps/chosen": -2.014667272567749, "logps/rejected": -4.499943733215332, "loss": 0.4792, "nll_loss": 0.4621904194355011, "rewards/accuracies": 1.0, "rewards/chosen": -0.20146673917770386, "rewards/margins": 0.2485276609659195, "rewards/rejected": -0.44999438524246216, "step": 6168 }, { "epoch": 16.889801505817932, "grad_norm": 8.153810501098633, "learning_rate": 1.5493150684931505e-07, "log_odds_chosen": 2.606407880783081, "log_odds_ratio": -0.4909164607524872, "logits/chosen": 1.2756178379058838, "logits/rejected": 1.2040221691131592, "logps/chosen": -2.736382246017456, "logps/rejected": -5.193212985992432, "loss": 0.5899, "nll_loss": 0.5407658815383911, "rewards/accuracies": 0.875, "rewards/chosen": -0.2736382484436035, "rewards/margins": 0.24568308889865875, "rewards/rejected": -0.5193213224411011, "step": 6169 }, { "epoch": 16.892539356605067, "grad_norm": 3.809941530227661, "learning_rate": 1.547945205479452e-07, "log_odds_chosen": 2.9457201957702637, "log_odds_ratio": -0.19803643226623535, "logits/chosen": 1.2628135681152344, "logits/rejected": 1.2408223152160645, "logps/chosen": -1.639268398284912, "logps/rejected": -4.366725921630859, "loss": 0.5107, "nll_loss": 0.49086880683898926, "rewards/accuracies": 0.875, "rewards/chosen": -0.1639268398284912, "rewards/margins": 0.2727457284927368, "rewards/rejected": -0.4366725981235504, "step": 6170 }, { "epoch": 16.895277207392198, "grad_norm": 3.4387753009796143, "learning_rate": 1.5465753424657533e-07, "log_odds_chosen": 2.682086706161499, "log_odds_ratio": -0.1711675077676773, "logits/chosen": 1.1617485284805298, "logits/rejected": 1.1736338138580322, "logps/chosen": -1.75822114944458, "logps/rejected": -4.204138278961182, "loss": 0.4627, "nll_loss": 0.4456259310245514, "rewards/accuracies": 1.0, "rewards/chosen": -0.17582212388515472, "rewards/margins": 0.24459171295166016, "rewards/rejected": -0.4204138517379761, "step": 6171 }, { "epoch": 16.89801505817933, "grad_norm": 5.66695499420166, "learning_rate": 1.5452054794520548e-07, "log_odds_chosen": 1.426761269569397, "log_odds_ratio": -0.31050097942352295, "logits/chosen": 1.0885976552963257, "logits/rejected": 1.0122809410095215, "logps/chosen": -2.5387473106384277, "logps/rejected": -3.8601207733154297, "loss": 0.5564, "nll_loss": 0.5253534317016602, "rewards/accuracies": 0.875, "rewards/chosen": -0.2538747191429138, "rewards/margins": 0.13213732838630676, "rewards/rejected": -0.3860120475292206, "step": 6172 }, { "epoch": 16.90075290896646, "grad_norm": 3.282182216644287, "learning_rate": 1.5438356164383563e-07, "log_odds_chosen": 3.7314629554748535, "log_odds_ratio": -0.10423049330711365, "logits/chosen": 1.0334848165512085, "logits/rejected": 1.0523093938827515, "logps/chosen": -1.8276832103729248, "logps/rejected": -5.237190246582031, "loss": 0.5299, "nll_loss": 0.5194536447525024, "rewards/accuracies": 1.0, "rewards/chosen": -0.182768315076828, "rewards/margins": 0.34095069766044617, "rewards/rejected": -0.523719072341919, "step": 6173 }, { "epoch": 16.903490759753595, "grad_norm": 6.551965236663818, "learning_rate": 1.5424657534246573e-07, "log_odds_chosen": 1.7141668796539307, "log_odds_ratio": -0.36180657148361206, "logits/chosen": 1.0718531608581543, "logits/rejected": 1.087308406829834, "logps/chosen": -2.6744184494018555, "logps/rejected": -4.274173736572266, "loss": 0.7643, "nll_loss": 0.7281399965286255, "rewards/accuracies": 0.625, "rewards/chosen": -0.26744183897972107, "rewards/margins": 0.1599755436182022, "rewards/rejected": -0.4274173974990845, "step": 6174 }, { "epoch": 16.906228610540726, "grad_norm": 3.37150502204895, "learning_rate": 1.5410958904109588e-07, "log_odds_chosen": 2.9726057052612305, "log_odds_ratio": -0.27969780564308167, "logits/chosen": 1.215474009513855, "logits/rejected": 1.1405384540557861, "logps/chosen": -1.3444300889968872, "logps/rejected": -4.086348056793213, "loss": 0.4256, "nll_loss": 0.3976755738258362, "rewards/accuracies": 1.0, "rewards/chosen": -0.1344430148601532, "rewards/margins": 0.27419179677963257, "rewards/rejected": -0.4086347818374634, "step": 6175 }, { "epoch": 16.908966461327857, "grad_norm": 3.8839352130889893, "learning_rate": 1.53972602739726e-07, "log_odds_chosen": 1.7907049655914307, "log_odds_ratio": -0.18998906016349792, "logits/chosen": 1.2801069021224976, "logits/rejected": 1.2303467988967896, "logps/chosen": -1.720099925994873, "logps/rejected": -3.3074605464935303, "loss": 0.4643, "nll_loss": 0.4452754259109497, "rewards/accuracies": 1.0, "rewards/chosen": -0.17201000452041626, "rewards/margins": 0.15873603522777557, "rewards/rejected": -0.330746054649353, "step": 6176 }, { "epoch": 16.91170431211499, "grad_norm": 3.4778831005096436, "learning_rate": 1.5383561643835616e-07, "log_odds_chosen": 3.005387544631958, "log_odds_ratio": -0.1497860997915268, "logits/chosen": 0.9409693479537964, "logits/rejected": 0.9807342886924744, "logps/chosen": -1.8534514904022217, "logps/rejected": -4.637750625610352, "loss": 0.5826, "nll_loss": 0.5676153302192688, "rewards/accuracies": 1.0, "rewards/chosen": -0.1853451430797577, "rewards/margins": 0.27842992544174194, "rewards/rejected": -0.463775098323822, "step": 6177 }, { "epoch": 16.914442162902123, "grad_norm": 4.099733829498291, "learning_rate": 1.536986301369863e-07, "log_odds_chosen": 1.9157946109771729, "log_odds_ratio": -0.18766134977340698, "logits/chosen": 1.1237612962722778, "logits/rejected": 1.1280180215835571, "logps/chosen": -1.4327579736709595, "logps/rejected": -2.962162494659424, "loss": 0.3962, "nll_loss": 0.3774753510951996, "rewards/accuracies": 1.0, "rewards/chosen": -0.14327579736709595, "rewards/margins": 0.15294045209884644, "rewards/rejected": -0.2962162494659424, "step": 6178 }, { "epoch": 16.917180013689254, "grad_norm": 3.51233172416687, "learning_rate": 1.5356164383561644e-07, "log_odds_chosen": 3.3462228775024414, "log_odds_ratio": -0.0703355148434639, "logits/chosen": 1.065544605255127, "logits/rejected": 1.0439988374710083, "logps/chosen": -2.045769453048706, "logps/rejected": -5.200879096984863, "loss": 0.5099, "nll_loss": 0.5028345584869385, "rewards/accuracies": 1.0, "rewards/chosen": -0.20457693934440613, "rewards/margins": 0.315511018037796, "rewards/rejected": -0.5200879573822021, "step": 6179 }, { "epoch": 16.919917864476385, "grad_norm": 3.5535097122192383, "learning_rate": 1.534246575342466e-07, "log_odds_chosen": 2.3751749992370605, "log_odds_ratio": -0.2660127878189087, "logits/chosen": 1.0841269493103027, "logits/rejected": 1.052996039390564, "logps/chosen": -1.5590425729751587, "logps/rejected": -3.5915307998657227, "loss": 0.4203, "nll_loss": 0.3936537206172943, "rewards/accuracies": 0.75, "rewards/chosen": -0.15590426325798035, "rewards/margins": 0.20324882864952087, "rewards/rejected": -0.3591530919075012, "step": 6180 }, { "epoch": 16.922655715263517, "grad_norm": 3.7665927410125732, "learning_rate": 1.532876712328767e-07, "log_odds_chosen": 2.2778871059417725, "log_odds_ratio": -0.33613869547843933, "logits/chosen": 1.274017333984375, "logits/rejected": 1.292374610900879, "logps/chosen": -1.8061227798461914, "logps/rejected": -3.8810276985168457, "loss": 0.5657, "nll_loss": 0.5321294665336609, "rewards/accuracies": 0.875, "rewards/chosen": -0.18061229586601257, "rewards/margins": 0.2074904590845108, "rewards/rejected": -0.38810276985168457, "step": 6181 }, { "epoch": 16.92539356605065, "grad_norm": 3.448275327682495, "learning_rate": 1.5315068493150684e-07, "log_odds_chosen": 3.522571563720703, "log_odds_ratio": -0.18322911858558655, "logits/chosen": 1.0680367946624756, "logits/rejected": 1.0622963905334473, "logps/chosen": -1.795650601387024, "logps/rejected": -5.009253025054932, "loss": 0.4916, "nll_loss": 0.4733145236968994, "rewards/accuracies": 1.0, "rewards/chosen": -0.17956505715847015, "rewards/margins": 0.3213602304458618, "rewards/rejected": -0.5009253025054932, "step": 6182 }, { "epoch": 16.928131416837783, "grad_norm": 4.422354698181152, "learning_rate": 1.5301369863013697e-07, "log_odds_chosen": 2.164168119430542, "log_odds_ratio": -0.2609098553657532, "logits/chosen": 1.150843620300293, "logits/rejected": 1.2351665496826172, "logps/chosen": -2.6019372940063477, "logps/rejected": -4.703354358673096, "loss": 0.6043, "nll_loss": 0.5781735181808472, "rewards/accuracies": 0.875, "rewards/chosen": -0.26019370555877686, "rewards/margins": 0.21014170348644257, "rewards/rejected": -0.4703354239463806, "step": 6183 }, { "epoch": 16.930869267624914, "grad_norm": 3.679710626602173, "learning_rate": 1.5287671232876712e-07, "log_odds_chosen": 2.3487091064453125, "log_odds_ratio": -0.1696704477071762, "logits/chosen": 1.209641933441162, "logits/rejected": 1.0853848457336426, "logps/chosen": -1.1485929489135742, "logps/rejected": -3.150512218475342, "loss": 0.3862, "nll_loss": 0.3692631721496582, "rewards/accuracies": 1.0, "rewards/chosen": -0.11485929787158966, "rewards/margins": 0.200191929936409, "rewards/rejected": -0.31505125761032104, "step": 6184 }, { "epoch": 16.933607118412045, "grad_norm": 4.931205749511719, "learning_rate": 1.5273972602739727e-07, "log_odds_chosen": 1.8269473314285278, "log_odds_ratio": -0.44589024782180786, "logits/chosen": 0.8298735022544861, "logits/rejected": 0.7416031360626221, "logps/chosen": -1.763040542602539, "logps/rejected": -3.398386001586914, "loss": 0.6782, "nll_loss": 0.6336410045623779, "rewards/accuracies": 0.75, "rewards/chosen": -0.17630405724048615, "rewards/margins": 0.16353458166122437, "rewards/rejected": -0.3398386240005493, "step": 6185 }, { "epoch": 16.93634496919918, "grad_norm": 5.458935260772705, "learning_rate": 1.526027397260274e-07, "log_odds_chosen": 1.4252393245697021, "log_odds_ratio": -0.7472193241119385, "logits/chosen": 1.0525237321853638, "logits/rejected": 1.1171423196792603, "logps/chosen": -2.07975697517395, "logps/rejected": -3.295757293701172, "loss": 0.6073, "nll_loss": 0.5325570106506348, "rewards/accuracies": 0.625, "rewards/chosen": -0.20797571539878845, "rewards/margins": 0.12160002440214157, "rewards/rejected": -0.3295757472515106, "step": 6186 }, { "epoch": 16.93908281998631, "grad_norm": 3.273892879486084, "learning_rate": 1.5246575342465755e-07, "log_odds_chosen": 4.846492290496826, "log_odds_ratio": -0.07070494443178177, "logits/chosen": 1.0514088869094849, "logits/rejected": 1.0524117946624756, "logps/chosen": -1.6082795858383179, "logps/rejected": -6.21630859375, "loss": 0.5346, "nll_loss": 0.5275136828422546, "rewards/accuracies": 1.0, "rewards/chosen": -0.16082796454429626, "rewards/margins": 0.4608028531074524, "rewards/rejected": -0.621630847454071, "step": 6187 }, { "epoch": 16.941820670773442, "grad_norm": 3.8718786239624023, "learning_rate": 1.5232876712328765e-07, "log_odds_chosen": 2.3608052730560303, "log_odds_ratio": -0.16184626519680023, "logits/chosen": 0.8402193784713745, "logits/rejected": 0.7578957080841064, "logps/chosen": -1.8957936763763428, "logps/rejected": -4.059157371520996, "loss": 0.5045, "nll_loss": 0.48834267258644104, "rewards/accuracies": 1.0, "rewards/chosen": -0.18957938253879547, "rewards/margins": 0.21633639931678772, "rewards/rejected": -0.405915766954422, "step": 6188 }, { "epoch": 16.944558521560573, "grad_norm": 3.392889976501465, "learning_rate": 1.521917808219178e-07, "log_odds_chosen": 4.292042255401611, "log_odds_ratio": -0.1639077365398407, "logits/chosen": 0.8623080253601074, "logits/rejected": 0.878761887550354, "logps/chosen": -1.5196677446365356, "logps/rejected": -5.6235857009887695, "loss": 0.5339, "nll_loss": 0.5175467133522034, "rewards/accuracies": 1.0, "rewards/chosen": -0.15196676552295685, "rewards/margins": 0.41039183735847473, "rewards/rejected": -0.5623586177825928, "step": 6189 }, { "epoch": 16.947296372347708, "grad_norm": 4.15440034866333, "learning_rate": 1.5205479452054795e-07, "log_odds_chosen": 2.3770103454589844, "log_odds_ratio": -0.21721887588500977, "logits/chosen": 0.7945650815963745, "logits/rejected": 0.7198195457458496, "logps/chosen": -1.5411076545715332, "logps/rejected": -3.740355968475342, "loss": 0.4749, "nll_loss": 0.45315223932266235, "rewards/accuracies": 1.0, "rewards/chosen": -0.15411077439785004, "rewards/margins": 0.21992482244968414, "rewards/rejected": -0.37403562664985657, "step": 6190 }, { "epoch": 16.95003422313484, "grad_norm": 5.603762626647949, "learning_rate": 1.5191780821917808e-07, "log_odds_chosen": 2.6605801582336426, "log_odds_ratio": -0.1412104219198227, "logits/chosen": 1.210157871246338, "logits/rejected": 1.2189595699310303, "logps/chosen": -2.021585464477539, "logps/rejected": -4.487403869628906, "loss": 0.556, "nll_loss": 0.5418993234634399, "rewards/accuracies": 1.0, "rewards/chosen": -0.20215855538845062, "rewards/margins": 0.24658185243606567, "rewards/rejected": -0.4487404227256775, "step": 6191 }, { "epoch": 16.95277207392197, "grad_norm": 3.8355205059051514, "learning_rate": 1.5178082191780823e-07, "log_odds_chosen": 3.131478786468506, "log_odds_ratio": -0.15163439512252808, "logits/chosen": 1.1300668716430664, "logits/rejected": 1.169372797012329, "logps/chosen": -1.897390604019165, "logps/rejected": -4.822523593902588, "loss": 0.4837, "nll_loss": 0.4684930741786957, "rewards/accuracies": 1.0, "rewards/chosen": -0.18973907828330994, "rewards/margins": 0.29251328110694885, "rewards/rejected": -0.4822523891925812, "step": 6192 }, { "epoch": 16.955509924709105, "grad_norm": 3.565765142440796, "learning_rate": 1.5164383561643835e-07, "log_odds_chosen": 3.3728513717651367, "log_odds_ratio": -0.14945107698440552, "logits/chosen": 1.1913689374923706, "logits/rejected": 1.1131553649902344, "logps/chosen": -1.8551063537597656, "logps/rejected": -5.053891181945801, "loss": 0.6257, "nll_loss": 0.6107598543167114, "rewards/accuracies": 1.0, "rewards/chosen": -0.18551065027713776, "rewards/margins": 0.3198785185813904, "rewards/rejected": -0.5053891539573669, "step": 6193 }, { "epoch": 16.958247775496236, "grad_norm": 3.623453140258789, "learning_rate": 1.5150684931506848e-07, "log_odds_chosen": 2.491567611694336, "log_odds_ratio": -0.16962432861328125, "logits/chosen": 1.2180099487304688, "logits/rejected": 1.1483747959136963, "logps/chosen": -1.8976802825927734, "logps/rejected": -4.256570816040039, "loss": 0.5767, "nll_loss": 0.5597627758979797, "rewards/accuracies": 1.0, "rewards/chosen": -0.18976803123950958, "rewards/margins": 0.2358890324831009, "rewards/rejected": -0.4256570637226105, "step": 6194 }, { "epoch": 16.960985626283367, "grad_norm": 3.493690252304077, "learning_rate": 1.513698630136986e-07, "log_odds_chosen": 3.6949048042297363, "log_odds_ratio": -0.15880216658115387, "logits/chosen": 1.0542891025543213, "logits/rejected": 0.969336211681366, "logps/chosen": -2.1372509002685547, "logps/rejected": -5.650561809539795, "loss": 0.5139, "nll_loss": 0.49805667996406555, "rewards/accuracies": 1.0, "rewards/chosen": -0.21372510492801666, "rewards/margins": 0.35133108496665955, "rewards/rejected": -0.5650561451911926, "step": 6195 }, { "epoch": 16.9637234770705, "grad_norm": 3.230088710784912, "learning_rate": 1.5123287671232876e-07, "log_odds_chosen": 3.0567660331726074, "log_odds_ratio": -0.10882733762264252, "logits/chosen": 1.3641115427017212, "logits/rejected": 1.3821550607681274, "logps/chosen": -1.5447354316711426, "logps/rejected": -4.345267295837402, "loss": 0.4684, "nll_loss": 0.45754456520080566, "rewards/accuracies": 1.0, "rewards/chosen": -0.15447354316711426, "rewards/margins": 0.2800532281398773, "rewards/rejected": -0.4345267415046692, "step": 6196 }, { "epoch": 16.966461327857633, "grad_norm": 3.5931594371795654, "learning_rate": 1.510958904109589e-07, "log_odds_chosen": 3.571772336959839, "log_odds_ratio": -0.1964539736509323, "logits/chosen": 1.1066272258758545, "logits/rejected": 0.966590940952301, "logps/chosen": -2.3510193824768066, "logps/rejected": -5.805506229400635, "loss": 0.6188, "nll_loss": 0.5991269946098328, "rewards/accuracies": 1.0, "rewards/chosen": -0.23510193824768066, "rewards/margins": 0.34544867277145386, "rewards/rejected": -0.5805506110191345, "step": 6197 }, { "epoch": 16.969199178644764, "grad_norm": 6.31477165222168, "learning_rate": 1.5095890410958903e-07, "log_odds_chosen": 3.247035264968872, "log_odds_ratio": -0.359437495470047, "logits/chosen": 1.0125653743743896, "logits/rejected": 0.9390720129013062, "logps/chosen": -1.9680988788604736, "logps/rejected": -4.9273881912231445, "loss": 0.5019, "nll_loss": 0.4659292995929718, "rewards/accuracies": 0.75, "rewards/chosen": -0.19680988788604736, "rewards/margins": 0.295928955078125, "rewards/rejected": -0.49273884296417236, "step": 6198 }, { "epoch": 16.971937029431896, "grad_norm": 3.295076608657837, "learning_rate": 1.5082191780821919e-07, "log_odds_chosen": 2.705334186553955, "log_odds_ratio": -0.14453016221523285, "logits/chosen": 1.2625386714935303, "logits/rejected": 1.2668461799621582, "logps/chosen": -1.474853754043579, "logps/rejected": -3.9199695587158203, "loss": 0.4531, "nll_loss": 0.43864452838897705, "rewards/accuracies": 1.0, "rewards/chosen": -0.1474853754043579, "rewards/margins": 0.24451160430908203, "rewards/rejected": -0.39199697971343994, "step": 6199 }, { "epoch": 16.974674880219027, "grad_norm": 3.5785088539123535, "learning_rate": 1.506849315068493e-07, "log_odds_chosen": 3.7985658645629883, "log_odds_ratio": -0.17851179838180542, "logits/chosen": 1.0827800035476685, "logits/rejected": 1.095057725906372, "logps/chosen": -2.1405344009399414, "logps/rejected": -5.792169570922852, "loss": 0.5258, "nll_loss": 0.5079300403594971, "rewards/accuracies": 1.0, "rewards/chosen": -0.2140534371137619, "rewards/margins": 0.36516350507736206, "rewards/rejected": -0.5792169570922852, "step": 6200 }, { "epoch": 16.97741273100616, "grad_norm": 3.7218449115753174, "learning_rate": 1.5054794520547944e-07, "log_odds_chosen": 1.6063300371170044, "log_odds_ratio": -0.28275731205940247, "logits/chosen": 1.2991313934326172, "logits/rejected": 1.2477450370788574, "logps/chosen": -1.25187087059021, "logps/rejected": -2.5251259803771973, "loss": 0.4853, "nll_loss": 0.4570046663284302, "rewards/accuracies": 0.875, "rewards/chosen": -0.12518709897994995, "rewards/margins": 0.12732550501823425, "rewards/rejected": -0.2525126039981842, "step": 6201 }, { "epoch": 16.980150581793293, "grad_norm": 4.068034648895264, "learning_rate": 1.5041095890410956e-07, "log_odds_chosen": 1.7480382919311523, "log_odds_ratio": -0.21102005243301392, "logits/chosen": 1.2789843082427979, "logits/rejected": 1.2682371139526367, "logps/chosen": -1.6187032461166382, "logps/rejected": -3.192598819732666, "loss": 0.4522, "nll_loss": 0.4311351180076599, "rewards/accuracies": 1.0, "rewards/chosen": -0.1618703305721283, "rewards/margins": 0.1573895514011383, "rewards/rejected": -0.3192598819732666, "step": 6202 }, { "epoch": 16.982888432580424, "grad_norm": 6.598416328430176, "learning_rate": 1.5027397260273972e-07, "log_odds_chosen": 3.1019937992095947, "log_odds_ratio": -0.31426042318344116, "logits/chosen": 1.1253541707992554, "logits/rejected": 1.025960087776184, "logps/chosen": -1.854308843612671, "logps/rejected": -4.670721054077148, "loss": 0.5909, "nll_loss": 0.5594240427017212, "rewards/accuracies": 0.875, "rewards/chosen": -0.1854308843612671, "rewards/margins": 0.28164127469062805, "rewards/rejected": -0.46707212924957275, "step": 6203 }, { "epoch": 16.985626283367555, "grad_norm": 5.9447174072265625, "learning_rate": 1.5013698630136987e-07, "log_odds_chosen": 3.0197482109069824, "log_odds_ratio": -0.20400455594062805, "logits/chosen": 1.1424691677093506, "logits/rejected": 1.1348060369491577, "logps/chosen": -2.403679847717285, "logps/rejected": -5.2676897048950195, "loss": 0.5868, "nll_loss": 0.5664364099502563, "rewards/accuracies": 1.0, "rewards/chosen": -0.24036797881126404, "rewards/margins": 0.28640100359916687, "rewards/rejected": -0.5267689228057861, "step": 6204 }, { "epoch": 16.98836413415469, "grad_norm": 4.320192337036133, "learning_rate": 1.5e-07, "log_odds_chosen": 1.839953064918518, "log_odds_ratio": -0.16854260861873627, "logits/chosen": 0.9008601307868958, "logits/rejected": 0.7955867052078247, "logps/chosen": -1.5723141431808472, "logps/rejected": -3.1568922996520996, "loss": 0.4647, "nll_loss": 0.4478603005409241, "rewards/accuracies": 1.0, "rewards/chosen": -0.1572314202785492, "rewards/margins": 0.15845781564712524, "rewards/rejected": -0.31568923592567444, "step": 6205 }, { "epoch": 16.99110198494182, "grad_norm": 3.6343436241149902, "learning_rate": 1.4986301369863015e-07, "log_odds_chosen": 2.347463846206665, "log_odds_ratio": -0.2191648781299591, "logits/chosen": 1.068009614944458, "logits/rejected": 0.9856637716293335, "logps/chosen": -1.1278457641601562, "logps/rejected": -3.19228458404541, "loss": 0.4202, "nll_loss": 0.3982934355735779, "rewards/accuracies": 1.0, "rewards/chosen": -0.11278457939624786, "rewards/margins": 0.2064438760280609, "rewards/rejected": -0.31922847032546997, "step": 6206 }, { "epoch": 16.993839835728952, "grad_norm": 3.6293649673461914, "learning_rate": 1.4972602739726024e-07, "log_odds_chosen": 1.9286693334579468, "log_odds_ratio": -0.2614736258983612, "logits/chosen": 1.1808147430419922, "logits/rejected": 1.1662803888320923, "logps/chosen": -1.5264804363250732, "logps/rejected": -3.285951852798462, "loss": 0.5347, "nll_loss": 0.5085998177528381, "rewards/accuracies": 0.875, "rewards/chosen": -0.15264803171157837, "rewards/margins": 0.1759471595287323, "rewards/rejected": -0.32859519124031067, "step": 6207 }, { "epoch": 16.996577686516083, "grad_norm": 6.342287063598633, "learning_rate": 1.495890410958904e-07, "log_odds_chosen": 1.740586280822754, "log_odds_ratio": -0.40664297342300415, "logits/chosen": 1.0070021152496338, "logits/rejected": 1.0335487127304077, "logps/chosen": -2.61904239654541, "logps/rejected": -4.252950191497803, "loss": 0.7533, "nll_loss": 0.7126646041870117, "rewards/accuracies": 0.75, "rewards/chosen": -0.261904239654541, "rewards/margins": 0.16339077055454254, "rewards/rejected": -0.42529502511024475, "step": 6208 }, { "epoch": 16.999315537303218, "grad_norm": 5.4564290046691895, "learning_rate": 1.4945205479452055e-07, "log_odds_chosen": 1.4721373319625854, "log_odds_ratio": -0.33898356556892395, "logits/chosen": 1.20406174659729, "logits/rejected": 1.1681768894195557, "logps/chosen": -1.907698392868042, "logps/rejected": -3.2821691036224365, "loss": 0.5447, "nll_loss": 0.5108194947242737, "rewards/accuracies": 1.0, "rewards/chosen": -0.19076985120773315, "rewards/margins": 0.1374470740556717, "rewards/rejected": -0.32821691036224365, "step": 6209 }, { "epoch": 17.00205338809035, "grad_norm": 3.822697162628174, "learning_rate": 1.4931506849315067e-07, "log_odds_chosen": 3.227052927017212, "log_odds_ratio": -0.23243890702724457, "logits/chosen": 1.103277325630188, "logits/rejected": 1.1758657693862915, "logps/chosen": -2.1105289459228516, "logps/rejected": -5.245001792907715, "loss": 0.6351, "nll_loss": 0.6118512153625488, "rewards/accuracies": 0.875, "rewards/chosen": -0.21105287969112396, "rewards/margins": 0.31344732642173767, "rewards/rejected": -0.5245001912117004, "step": 6210 }, { "epoch": 17.00479123887748, "grad_norm": 3.517490863800049, "learning_rate": 1.4917808219178083e-07, "log_odds_chosen": 2.86653995513916, "log_odds_ratio": -0.18782548606395721, "logits/chosen": 1.1840908527374268, "logits/rejected": 1.2697210311889648, "logps/chosen": -1.4050489664077759, "logps/rejected": -3.9924724102020264, "loss": 0.4682, "nll_loss": 0.4494319260120392, "rewards/accuracies": 0.875, "rewards/chosen": -0.14050491154193878, "rewards/margins": 0.2587423324584961, "rewards/rejected": -0.39924728870391846, "step": 6211 }, { "epoch": 17.00752908966461, "grad_norm": 3.8699514865875244, "learning_rate": 1.4904109589041095e-07, "log_odds_chosen": 3.6568894386291504, "log_odds_ratio": -0.3011816143989563, "logits/chosen": 1.0571081638336182, "logits/rejected": 1.1775304079055786, "logps/chosen": -2.092775821685791, "logps/rejected": -5.6224517822265625, "loss": 0.6475, "nll_loss": 0.6174086332321167, "rewards/accuracies": 0.875, "rewards/chosen": -0.20927760004997253, "rewards/margins": 0.3529675602912903, "rewards/rejected": -0.5622451901435852, "step": 6212 }, { "epoch": 17.010266940451746, "grad_norm": 3.6802659034729004, "learning_rate": 1.489041095890411e-07, "log_odds_chosen": 3.536831855773926, "log_odds_ratio": -0.09259416162967682, "logits/chosen": 0.9530190825462341, "logits/rejected": 0.9768018126487732, "logps/chosen": -2.7958781719207764, "logps/rejected": -6.237499237060547, "loss": 0.7941, "nll_loss": 0.7848120927810669, "rewards/accuracies": 1.0, "rewards/chosen": -0.2795878052711487, "rewards/margins": 0.34416213631629944, "rewards/rejected": -0.6237499117851257, "step": 6213 }, { "epoch": 17.013004791238878, "grad_norm": 3.4093470573425293, "learning_rate": 1.487671232876712e-07, "log_odds_chosen": 2.9634833335876465, "log_odds_ratio": -0.21320611238479614, "logits/chosen": 0.9653656482696533, "logits/rejected": 0.916645348072052, "logps/chosen": -1.2856910228729248, "logps/rejected": -3.877605438232422, "loss": 0.4304, "nll_loss": 0.4090985655784607, "rewards/accuracies": 0.875, "rewards/chosen": -0.1285691112279892, "rewards/margins": 0.2591914236545563, "rewards/rejected": -0.38776054978370667, "step": 6214 }, { "epoch": 17.01574264202601, "grad_norm": 3.641331672668457, "learning_rate": 1.4863013698630136e-07, "log_odds_chosen": 2.19820237159729, "log_odds_ratio": -0.24200491607189178, "logits/chosen": 1.1424092054367065, "logits/rejected": 1.08577561378479, "logps/chosen": -1.2337714433670044, "logps/rejected": -3.1815407276153564, "loss": 0.4038, "nll_loss": 0.3795711100101471, "rewards/accuracies": 1.0, "rewards/chosen": -0.12337715178728104, "rewards/margins": 0.19477692246437073, "rewards/rejected": -0.31815406680107117, "step": 6215 }, { "epoch": 17.018480492813143, "grad_norm": 10.054727554321289, "learning_rate": 1.484931506849315e-07, "log_odds_chosen": 1.6826258897781372, "log_odds_ratio": -0.3781817853450775, "logits/chosen": 1.4693279266357422, "logits/rejected": 1.4595656394958496, "logps/chosen": -2.008559465408325, "logps/rejected": -3.5568087100982666, "loss": 0.5047, "nll_loss": 0.46689313650131226, "rewards/accuracies": 0.75, "rewards/chosen": -0.20085595548152924, "rewards/margins": 0.15482491254806519, "rewards/rejected": -0.3556808829307556, "step": 6216 }, { "epoch": 17.021218343600275, "grad_norm": 4.618656158447266, "learning_rate": 1.4835616438356163e-07, "log_odds_chosen": 3.2871434688568115, "log_odds_ratio": -0.07829712331295013, "logits/chosen": 1.4961491823196411, "logits/rejected": 1.5300018787384033, "logps/chosen": -1.9955350160598755, "logps/rejected": -5.017923355102539, "loss": 0.5087, "nll_loss": 0.5008953213691711, "rewards/accuracies": 1.0, "rewards/chosen": -0.1995535045862198, "rewards/margins": 0.3022388517856598, "rewards/rejected": -0.501792311668396, "step": 6217 }, { "epoch": 17.023956194387406, "grad_norm": 6.883768081665039, "learning_rate": 1.4821917808219178e-07, "log_odds_chosen": 4.303812026977539, "log_odds_ratio": -0.12296091765165329, "logits/chosen": 1.4718594551086426, "logits/rejected": 1.4505409002304077, "logps/chosen": -1.9321280717849731, "logps/rejected": -6.003093719482422, "loss": 0.6214, "nll_loss": 0.6090701818466187, "rewards/accuracies": 1.0, "rewards/chosen": -0.19321280717849731, "rewards/margins": 0.4070965647697449, "rewards/rejected": -0.6003093719482422, "step": 6218 }, { "epoch": 17.026694045174537, "grad_norm": 6.448489189147949, "learning_rate": 1.480821917808219e-07, "log_odds_chosen": 1.9442765712738037, "log_odds_ratio": -0.1980711817741394, "logits/chosen": 1.4401366710662842, "logits/rejected": 1.304018259048462, "logps/chosen": -1.293198585510254, "logps/rejected": -2.966630697250366, "loss": 0.4703, "nll_loss": 0.4504615068435669, "rewards/accuracies": 1.0, "rewards/chosen": -0.12931987643241882, "rewards/margins": 0.16734319925308228, "rewards/rejected": -0.2966630458831787, "step": 6219 }, { "epoch": 17.02943189596167, "grad_norm": 4.269944667816162, "learning_rate": 1.4794520547945206e-07, "log_odds_chosen": 3.1552696228027344, "log_odds_ratio": -0.12593655288219452, "logits/chosen": 0.8764374852180481, "logits/rejected": 0.8639129400253296, "logps/chosen": -1.759972333908081, "logps/rejected": -4.478407859802246, "loss": 0.4182, "nll_loss": 0.40562868118286133, "rewards/accuracies": 1.0, "rewards/chosen": -0.17599722743034363, "rewards/margins": 0.2718435525894165, "rewards/rejected": -0.44784078001976013, "step": 6220 }, { "epoch": 17.032169746748803, "grad_norm": 3.2155239582061768, "learning_rate": 1.4780821917808216e-07, "log_odds_chosen": 2.9382567405700684, "log_odds_ratio": -0.16749991476535797, "logits/chosen": 1.0509740114212036, "logits/rejected": 0.9329587817192078, "logps/chosen": -2.0562705993652344, "logps/rejected": -4.787073135375977, "loss": 0.4571, "nll_loss": 0.4403578042984009, "rewards/accuracies": 1.0, "rewards/chosen": -0.20562708377838135, "rewards/margins": 0.2730802595615387, "rewards/rejected": -0.47870731353759766, "step": 6221 }, { "epoch": 17.034907597535934, "grad_norm": 4.1892523765563965, "learning_rate": 1.4767123287671231e-07, "log_odds_chosen": 2.629706382751465, "log_odds_ratio": -0.2256109118461609, "logits/chosen": 0.9578658938407898, "logits/rejected": 0.9176747798919678, "logps/chosen": -1.3212507963180542, "logps/rejected": -3.651233196258545, "loss": 0.4549, "nll_loss": 0.43236058950424194, "rewards/accuracies": 0.875, "rewards/chosen": -0.13212507963180542, "rewards/margins": 0.23299825191497803, "rewards/rejected": -0.36512336134910583, "step": 6222 }, { "epoch": 17.037645448323065, "grad_norm": 3.5586044788360596, "learning_rate": 1.4753424657534247e-07, "log_odds_chosen": 2.652242660522461, "log_odds_ratio": -0.19351841509342194, "logits/chosen": 1.3527766466140747, "logits/rejected": 1.3541889190673828, "logps/chosen": -1.9648704528808594, "logps/rejected": -4.485182285308838, "loss": 0.5001, "nll_loss": 0.4807807207107544, "rewards/accuracies": 1.0, "rewards/chosen": -0.19648706912994385, "rewards/margins": 0.25203120708465576, "rewards/rejected": -0.4485182762145996, "step": 6223 }, { "epoch": 17.0403832991102, "grad_norm": 3.9864885807037354, "learning_rate": 1.473972602739726e-07, "log_odds_chosen": 4.825377464294434, "log_odds_ratio": -0.13148152828216553, "logits/chosen": 1.0849658250808716, "logits/rejected": 1.0453771352767944, "logps/chosen": -2.3157010078430176, "logps/rejected": -6.98160457611084, "loss": 0.6527, "nll_loss": 0.6395546197891235, "rewards/accuracies": 1.0, "rewards/chosen": -0.23157010972499847, "rewards/margins": 0.46659034490585327, "rewards/rejected": -0.6981604695320129, "step": 6224 }, { "epoch": 17.04312114989733, "grad_norm": 3.6648333072662354, "learning_rate": 1.4726027397260274e-07, "log_odds_chosen": 3.430776596069336, "log_odds_ratio": -0.2097417116165161, "logits/chosen": 0.9936902523040771, "logits/rejected": 0.9872358441352844, "logps/chosen": -1.3739022016525269, "logps/rejected": -4.524379730224609, "loss": 0.5285, "nll_loss": 0.5074853897094727, "rewards/accuracies": 1.0, "rewards/chosen": -0.13739021122455597, "rewards/margins": 0.3150478005409241, "rewards/rejected": -0.45243799686431885, "step": 6225 }, { "epoch": 17.045859000684462, "grad_norm": 7.2151408195495605, "learning_rate": 1.4712328767123287e-07, "log_odds_chosen": 3.094703435897827, "log_odds_ratio": -0.4685018062591553, "logits/chosen": 1.2747193574905396, "logits/rejected": 1.3086466789245605, "logps/chosen": -2.1979355812072754, "logps/rejected": -5.164750576019287, "loss": 0.586, "nll_loss": 0.5391038656234741, "rewards/accuracies": 0.625, "rewards/chosen": -0.21979355812072754, "rewards/margins": 0.2966815233230591, "rewards/rejected": -0.5164750814437866, "step": 6226 }, { "epoch": 17.048596851471594, "grad_norm": 4.745730400085449, "learning_rate": 1.4698630136986302e-07, "log_odds_chosen": 1.8248862028121948, "log_odds_ratio": -0.32864075899124146, "logits/chosen": 1.1483359336853027, "logits/rejected": 1.198986530303955, "logps/chosen": -2.528590440750122, "logps/rejected": -4.256250858306885, "loss": 0.665, "nll_loss": 0.6320927143096924, "rewards/accuracies": 0.875, "rewards/chosen": -0.25285905599594116, "rewards/margins": 0.17276602983474731, "rewards/rejected": -0.42562511563301086, "step": 6227 }, { "epoch": 17.05133470225873, "grad_norm": 3.780550956726074, "learning_rate": 1.4684931506849315e-07, "log_odds_chosen": 1.5058417320251465, "log_odds_ratio": -0.31735214591026306, "logits/chosen": 0.9477788209915161, "logits/rejected": 0.8872798681259155, "logps/chosen": -1.9525871276855469, "logps/rejected": -3.3455421924591064, "loss": 0.5743, "nll_loss": 0.542550802230835, "rewards/accuracies": 0.75, "rewards/chosen": -0.1952587217092514, "rewards/margins": 0.13929550349712372, "rewards/rejected": -0.3345542252063751, "step": 6228 }, { "epoch": 17.05407255304586, "grad_norm": 3.5262439250946045, "learning_rate": 1.4671232876712327e-07, "log_odds_chosen": 2.2185981273651123, "log_odds_ratio": -0.19520577788352966, "logits/chosen": 1.2083237171173096, "logits/rejected": 1.1400206089019775, "logps/chosen": -1.9830098152160645, "logps/rejected": -4.049735069274902, "loss": 0.5651, "nll_loss": 0.5455383062362671, "rewards/accuracies": 1.0, "rewards/chosen": -0.19830100238323212, "rewards/margins": 0.2066725194454193, "rewards/rejected": -0.40497350692749023, "step": 6229 }, { "epoch": 17.05681040383299, "grad_norm": 3.8226089477539062, "learning_rate": 1.4657534246575342e-07, "log_odds_chosen": 0.9933249950408936, "log_odds_ratio": -0.3627372682094574, "logits/chosen": 1.2108397483825684, "logits/rejected": 1.190657377243042, "logps/chosen": -1.79037344455719, "logps/rejected": -2.621692657470703, "loss": 0.4104, "nll_loss": 0.3741285502910614, "rewards/accuracies": 0.875, "rewards/chosen": -0.17903734743595123, "rewards/margins": 0.08313190937042236, "rewards/rejected": -0.2621692717075348, "step": 6230 }, { "epoch": 17.059548254620122, "grad_norm": 2.99350643157959, "learning_rate": 1.4643835616438355e-07, "log_odds_chosen": 5.503851890563965, "log_odds_ratio": -0.14600925147533417, "logits/chosen": 1.1313378810882568, "logits/rejected": 1.0895365476608276, "logps/chosen": -1.5982774496078491, "logps/rejected": -6.860124588012695, "loss": 0.5354, "nll_loss": 0.5208165645599365, "rewards/accuracies": 1.0, "rewards/chosen": -0.15982773900032043, "rewards/margins": 0.5261846780776978, "rewards/rejected": -0.6860123872756958, "step": 6231 }, { "epoch": 17.062286105407257, "grad_norm": 3.8767292499542236, "learning_rate": 1.463013698630137e-07, "log_odds_chosen": 1.7665596008300781, "log_odds_ratio": -0.24078677594661713, "logits/chosen": 1.2839702367782593, "logits/rejected": 1.2944532632827759, "logps/chosen": -1.6935055255889893, "logps/rejected": -3.313976526260376, "loss": 0.51, "nll_loss": 0.4859159588813782, "rewards/accuracies": 1.0, "rewards/chosen": -0.16935056447982788, "rewards/margins": 0.16204708814620972, "rewards/rejected": -0.3313976526260376, "step": 6232 }, { "epoch": 17.065023956194388, "grad_norm": 3.9711530208587646, "learning_rate": 1.4616438356164383e-07, "log_odds_chosen": 1.6086444854736328, "log_odds_ratio": -0.21858622133731842, "logits/chosen": 1.1690728664398193, "logits/rejected": 1.0642452239990234, "logps/chosen": -1.4514882564544678, "logps/rejected": -2.860063076019287, "loss": 0.4095, "nll_loss": 0.3876335024833679, "rewards/accuracies": 1.0, "rewards/chosen": -0.14514882862567902, "rewards/margins": 0.1408574879169464, "rewards/rejected": -0.28600630164146423, "step": 6233 }, { "epoch": 17.06776180698152, "grad_norm": 4.623937129974365, "learning_rate": 1.4602739726027395e-07, "log_odds_chosen": 0.5234339237213135, "log_odds_ratio": -0.6238917112350464, "logits/chosen": 1.0916543006896973, "logits/rejected": 1.1480478048324585, "logps/chosen": -2.262176036834717, "logps/rejected": -2.6898269653320312, "loss": 0.5933, "nll_loss": 0.5309509038925171, "rewards/accuracies": 0.625, "rewards/chosen": -0.2262175977230072, "rewards/margins": 0.04276507347822189, "rewards/rejected": -0.2689826786518097, "step": 6234 }, { "epoch": 17.07049965776865, "grad_norm": 3.89724063873291, "learning_rate": 1.458904109589041e-07, "log_odds_chosen": 2.7155609130859375, "log_odds_ratio": -0.19805429875850677, "logits/chosen": 1.1755502223968506, "logits/rejected": 1.241447925567627, "logps/chosen": -2.0033349990844727, "logps/rejected": -4.562069416046143, "loss": 0.6244, "nll_loss": 0.6045675277709961, "rewards/accuracies": 1.0, "rewards/chosen": -0.20033350586891174, "rewards/margins": 0.255873441696167, "rewards/rejected": -0.4562069773674011, "step": 6235 }, { "epoch": 17.073237508555785, "grad_norm": 3.9008548259735107, "learning_rate": 1.4575342465753423e-07, "log_odds_chosen": 2.874330520629883, "log_odds_ratio": -0.15159526467323303, "logits/chosen": 1.0348458290100098, "logits/rejected": 1.0278987884521484, "logps/chosen": -1.3838424682617188, "logps/rejected": -3.9695122241973877, "loss": 0.4294, "nll_loss": 0.4142811596393585, "rewards/accuracies": 1.0, "rewards/chosen": -0.13838425278663635, "rewards/margins": 0.2585669457912445, "rewards/rejected": -0.39695122838020325, "step": 6236 }, { "epoch": 17.075975359342916, "grad_norm": 4.105602264404297, "learning_rate": 1.4561643835616438e-07, "log_odds_chosen": 2.6451451778411865, "log_odds_ratio": -0.23068903386592865, "logits/chosen": 1.2246400117874146, "logits/rejected": 1.2722922563552856, "logps/chosen": -2.1081807613372803, "logps/rejected": -4.641664981842041, "loss": 0.5999, "nll_loss": 0.576846182346344, "rewards/accuracies": 0.875, "rewards/chosen": -0.2108180820941925, "rewards/margins": 0.2533484399318695, "rewards/rejected": -0.464166522026062, "step": 6237 }, { "epoch": 17.078713210130047, "grad_norm": 3.952867031097412, "learning_rate": 1.454794520547945e-07, "log_odds_chosen": 2.3108878135681152, "log_odds_ratio": -0.30405330657958984, "logits/chosen": 0.94560706615448, "logits/rejected": 0.8590131998062134, "logps/chosen": -1.9153573513031006, "logps/rejected": -4.127292633056641, "loss": 0.4758, "nll_loss": 0.4453567862510681, "rewards/accuracies": 0.875, "rewards/chosen": -0.19153572618961334, "rewards/margins": 0.22119355201721191, "rewards/rejected": -0.41272932291030884, "step": 6238 }, { "epoch": 17.08145106091718, "grad_norm": 4.050812721252441, "learning_rate": 1.4534246575342466e-07, "log_odds_chosen": 2.8125176429748535, "log_odds_ratio": -0.17120158672332764, "logits/chosen": 0.9673811197280884, "logits/rejected": 0.9201000928878784, "logps/chosen": -2.1489615440368652, "logps/rejected": -4.805446147918701, "loss": 0.4674, "nll_loss": 0.4502766728401184, "rewards/accuracies": 1.0, "rewards/chosen": -0.21489614248275757, "rewards/margins": 0.2656484544277191, "rewards/rejected": -0.4805446267127991, "step": 6239 }, { "epoch": 17.084188911704313, "grad_norm": 4.083644866943359, "learning_rate": 1.4520547945205479e-07, "log_odds_chosen": 2.3571925163269043, "log_odds_ratio": -0.25954964756965637, "logits/chosen": 1.0302298069000244, "logits/rejected": 1.0569101572036743, "logps/chosen": -1.8729796409606934, "logps/rejected": -4.119513988494873, "loss": 0.5898, "nll_loss": 0.5638374090194702, "rewards/accuracies": 1.0, "rewards/chosen": -0.187297984957695, "rewards/margins": 0.2246534377336502, "rewards/rejected": -0.4119514226913452, "step": 6240 }, { "epoch": 17.086926762491444, "grad_norm": 11.530598640441895, "learning_rate": 1.450684931506849e-07, "log_odds_chosen": 3.736963987350464, "log_odds_ratio": -0.30615806579589844, "logits/chosen": 1.2872834205627441, "logits/rejected": 1.2750015258789062, "logps/chosen": -2.0514540672302246, "logps/rejected": -5.506165504455566, "loss": 0.7221, "nll_loss": 0.6915194988250732, "rewards/accuracies": 0.875, "rewards/chosen": -0.20514538884162903, "rewards/margins": 0.3454711139202118, "rewards/rejected": -0.5506165027618408, "step": 6241 }, { "epoch": 17.089664613278575, "grad_norm": 8.39111328125, "learning_rate": 1.4493150684931506e-07, "log_odds_chosen": 2.6309869289398193, "log_odds_ratio": -0.40552520751953125, "logits/chosen": 1.2266355752944946, "logits/rejected": 1.1588332653045654, "logps/chosen": -2.349886655807495, "logps/rejected": -4.721731185913086, "loss": 0.4626, "nll_loss": 0.4220447540283203, "rewards/accuracies": 0.875, "rewards/chosen": -0.23498865962028503, "rewards/margins": 0.23718449473381042, "rewards/rejected": -0.47217315435409546, "step": 6242 }, { "epoch": 17.09240246406571, "grad_norm": 4.5115861892700195, "learning_rate": 1.447945205479452e-07, "log_odds_chosen": 1.8659307956695557, "log_odds_ratio": -0.2448587715625763, "logits/chosen": 1.0455363988876343, "logits/rejected": 1.0508742332458496, "logps/chosen": -1.71099853515625, "logps/rejected": -3.4077510833740234, "loss": 0.5329, "nll_loss": 0.5084039568901062, "rewards/accuracies": 1.0, "rewards/chosen": -0.17109984159469604, "rewards/margins": 0.16967526078224182, "rewards/rejected": -0.34077513217926025, "step": 6243 }, { "epoch": 17.09514031485284, "grad_norm": 4.244689464569092, "learning_rate": 1.4465753424657534e-07, "log_odds_chosen": 3.1446170806884766, "log_odds_ratio": -0.26749932765960693, "logits/chosen": 1.0797337293624878, "logits/rejected": 1.0992275476455688, "logps/chosen": -1.7611581087112427, "logps/rejected": -4.711042881011963, "loss": 0.5902, "nll_loss": 0.5634499192237854, "rewards/accuracies": 1.0, "rewards/chosen": -0.17611581087112427, "rewards/margins": 0.2949884533882141, "rewards/rejected": -0.47110429406166077, "step": 6244 }, { "epoch": 17.097878165639973, "grad_norm": 4.50295352935791, "learning_rate": 1.4452054794520547e-07, "log_odds_chosen": 1.9695645570755005, "log_odds_ratio": -0.3254047930240631, "logits/chosen": 1.3013560771942139, "logits/rejected": 1.179925799369812, "logps/chosen": -1.4999384880065918, "logps/rejected": -3.2769336700439453, "loss": 0.4234, "nll_loss": 0.3909059166908264, "rewards/accuracies": 0.875, "rewards/chosen": -0.14999385178089142, "rewards/margins": 0.1776995062828064, "rewards/rejected": -0.327693372964859, "step": 6245 }, { "epoch": 17.100616016427104, "grad_norm": 3.698636293411255, "learning_rate": 1.4438356164383562e-07, "log_odds_chosen": 3.620506525039673, "log_odds_ratio": -0.21131230890750885, "logits/chosen": 0.966153621673584, "logits/rejected": 0.9625464677810669, "logps/chosen": -2.0199193954467773, "logps/rejected": -5.515445709228516, "loss": 0.6192, "nll_loss": 0.5980215668678284, "rewards/accuracies": 1.0, "rewards/chosen": -0.2019919455051422, "rewards/margins": 0.34955260157585144, "rewards/rejected": -0.5515445470809937, "step": 6246 }, { "epoch": 17.10335386721424, "grad_norm": 3.26891827583313, "learning_rate": 1.4424657534246577e-07, "log_odds_chosen": 2.5794639587402344, "log_odds_ratio": -0.2528378367424011, "logits/chosen": 1.2173327207565308, "logits/rejected": 1.2394095659255981, "logps/chosen": -1.5844848155975342, "logps/rejected": -3.8307130336761475, "loss": 0.4442, "nll_loss": 0.41891223192214966, "rewards/accuracies": 1.0, "rewards/chosen": -0.1584484577178955, "rewards/margins": 0.22462281584739685, "rewards/rejected": -0.38307130336761475, "step": 6247 }, { "epoch": 17.10609171800137, "grad_norm": 4.092590808868408, "learning_rate": 1.4410958904109587e-07, "log_odds_chosen": 2.395460605621338, "log_odds_ratio": -0.22086788713932037, "logits/chosen": 0.8255987167358398, "logits/rejected": 0.7975839972496033, "logps/chosen": -2.1098737716674805, "logps/rejected": -4.349506855010986, "loss": 0.5103, "nll_loss": 0.48822736740112305, "rewards/accuracies": 1.0, "rewards/chosen": -0.2109873741865158, "rewards/margins": 0.22396332025527954, "rewards/rejected": -0.43495070934295654, "step": 6248 }, { "epoch": 17.1088295687885, "grad_norm": 3.587552070617676, "learning_rate": 1.4397260273972602e-07, "log_odds_chosen": 2.1643621921539307, "log_odds_ratio": -0.29639142751693726, "logits/chosen": 1.0484849214553833, "logits/rejected": 1.0164158344268799, "logps/chosen": -2.0165278911590576, "logps/rejected": -4.1006999015808105, "loss": 0.5786, "nll_loss": 0.5489787459373474, "rewards/accuracies": 0.875, "rewards/chosen": -0.20165279507637024, "rewards/margins": 0.20841720700263977, "rewards/rejected": -0.41007000207901, "step": 6249 }, { "epoch": 17.111567419575632, "grad_norm": 3.221623182296753, "learning_rate": 1.4383561643835615e-07, "log_odds_chosen": 2.9253289699554443, "log_odds_ratio": -0.19709999859333038, "logits/chosen": 1.3292789459228516, "logits/rejected": 1.248511552810669, "logps/chosen": -1.307867169380188, "logps/rejected": -4.005467414855957, "loss": 0.4766, "nll_loss": 0.4569181799888611, "rewards/accuracies": 1.0, "rewards/chosen": -0.1307867169380188, "rewards/margins": 0.26976001262664795, "rewards/rejected": -0.40054672956466675, "step": 6250 }, { "epoch": 17.114305270362767, "grad_norm": 3.871368646621704, "learning_rate": 1.436986301369863e-07, "log_odds_chosen": 1.4698269367218018, "log_odds_ratio": -0.2993773818016052, "logits/chosen": 0.8729636073112488, "logits/rejected": 0.8581861853599548, "logps/chosen": -1.846195936203003, "logps/rejected": -3.1497199535369873, "loss": 0.4762, "nll_loss": 0.4462437033653259, "rewards/accuracies": 1.0, "rewards/chosen": -0.18461959064006805, "rewards/margins": 0.13035240769386292, "rewards/rejected": -0.3149719834327698, "step": 6251 }, { "epoch": 17.117043121149898, "grad_norm": 3.89928936958313, "learning_rate": 1.4356164383561643e-07, "log_odds_chosen": 2.7763898372650146, "log_odds_ratio": -0.13062416017055511, "logits/chosen": 1.0283112525939941, "logits/rejected": 0.9942043423652649, "logps/chosen": -2.0840249061584473, "logps/rejected": -4.657832622528076, "loss": 0.4946, "nll_loss": 0.48155587911605835, "rewards/accuracies": 1.0, "rewards/chosen": -0.20840249955654144, "rewards/margins": 0.25738078355789185, "rewards/rejected": -0.4657832682132721, "step": 6252 }, { "epoch": 17.11978097193703, "grad_norm": 17.013925552368164, "learning_rate": 1.4342465753424658e-07, "log_odds_chosen": 1.2784266471862793, "log_odds_ratio": -0.7809683084487915, "logits/chosen": 1.317613959312439, "logits/rejected": 1.2924724817276, "logps/chosen": -2.6608099937438965, "logps/rejected": -3.7535576820373535, "loss": 0.5921, "nll_loss": 0.5139647126197815, "rewards/accuracies": 0.75, "rewards/chosen": -0.2660810053348541, "rewards/margins": 0.10927474498748779, "rewards/rejected": -0.37535572052001953, "step": 6253 }, { "epoch": 17.12251882272416, "grad_norm": 3.4903476238250732, "learning_rate": 1.432876712328767e-07, "log_odds_chosen": 2.8987021446228027, "log_odds_ratio": -0.245645210146904, "logits/chosen": 1.1743221282958984, "logits/rejected": 1.1812129020690918, "logps/chosen": -1.8795835971832275, "logps/rejected": -4.645052433013916, "loss": 0.4957, "nll_loss": 0.47112834453582764, "rewards/accuracies": 1.0, "rewards/chosen": -0.18795835971832275, "rewards/margins": 0.2765469253063202, "rewards/rejected": -0.46450525522232056, "step": 6254 }, { "epoch": 17.125256673511295, "grad_norm": 3.7009921073913574, "learning_rate": 1.4315068493150683e-07, "log_odds_chosen": 1.782858967781067, "log_odds_ratio": -0.250871479511261, "logits/chosen": 0.9717283248901367, "logits/rejected": 0.9138885140419006, "logps/chosen": -1.668326735496521, "logps/rejected": -3.2871322631835938, "loss": 0.5352, "nll_loss": 0.5101295709609985, "rewards/accuracies": 0.875, "rewards/chosen": -0.16683268547058105, "rewards/margins": 0.16188055276870728, "rewards/rejected": -0.3287132680416107, "step": 6255 }, { "epoch": 17.127994524298426, "grad_norm": 3.7537453174591064, "learning_rate": 1.4301369863013698e-07, "log_odds_chosen": 2.9442601203918457, "log_odds_ratio": -0.22228272259235382, "logits/chosen": 0.7527506947517395, "logits/rejected": 0.6885663270950317, "logps/chosen": -1.482372522354126, "logps/rejected": -4.214900970458984, "loss": 0.532, "nll_loss": 0.509736180305481, "rewards/accuracies": 0.875, "rewards/chosen": -0.14823725819587708, "rewards/margins": 0.2732528746128082, "rewards/rejected": -0.4214901328086853, "step": 6256 }, { "epoch": 17.130732375085557, "grad_norm": 4.073447227478027, "learning_rate": 1.428767123287671e-07, "log_odds_chosen": 1.6892931461334229, "log_odds_ratio": -0.2524144649505615, "logits/chosen": 1.2719277143478394, "logits/rejected": 1.2282171249389648, "logps/chosen": -1.3843934535980225, "logps/rejected": -2.8213140964508057, "loss": 0.433, "nll_loss": 0.4077550172805786, "rewards/accuracies": 1.0, "rewards/chosen": -0.1384393572807312, "rewards/margins": 0.1436920315027237, "rewards/rejected": -0.2821314036846161, "step": 6257 }, { "epoch": 17.13347022587269, "grad_norm": 4.195107936859131, "learning_rate": 1.4273972602739726e-07, "log_odds_chosen": 2.6158347129821777, "log_odds_ratio": -0.22858861088752747, "logits/chosen": 1.0362123250961304, "logits/rejected": 1.117714524269104, "logps/chosen": -2.736522674560547, "logps/rejected": -5.190049171447754, "loss": 0.5956, "nll_loss": 0.5727227926254272, "rewards/accuracies": 0.875, "rewards/chosen": -0.2736522853374481, "rewards/margins": 0.24535267055034637, "rewards/rejected": -0.5190049409866333, "step": 6258 }, { "epoch": 17.136208076659823, "grad_norm": 3.6938564777374268, "learning_rate": 1.426027397260274e-07, "log_odds_chosen": 3.760711669921875, "log_odds_ratio": -0.17789170145988464, "logits/chosen": 1.0979799032211304, "logits/rejected": 1.0718321800231934, "logps/chosen": -1.8644481897354126, "logps/rejected": -5.488680839538574, "loss": 0.4998, "nll_loss": 0.48198631405830383, "rewards/accuracies": 1.0, "rewards/chosen": -0.18644483387470245, "rewards/margins": 0.36242327094078064, "rewards/rejected": -0.5488680601119995, "step": 6259 }, { "epoch": 17.138945927446954, "grad_norm": 5.954524040222168, "learning_rate": 1.4246575342465754e-07, "log_odds_chosen": 2.2583577632904053, "log_odds_ratio": -0.21387772262096405, "logits/chosen": 1.1018929481506348, "logits/rejected": 1.0147521495819092, "logps/chosen": -1.8556733131408691, "logps/rejected": -3.9044108390808105, "loss": 0.5927, "nll_loss": 0.5712811350822449, "rewards/accuracies": 1.0, "rewards/chosen": -0.18556734919548035, "rewards/margins": 0.20487377047538757, "rewards/rejected": -0.3904411196708679, "step": 6260 }, { "epoch": 17.141683778234086, "grad_norm": 3.492093324661255, "learning_rate": 1.4232876712328766e-07, "log_odds_chosen": 1.9041482210159302, "log_odds_ratio": -0.2442903369665146, "logits/chosen": 0.9014316201210022, "logits/rejected": 0.9526511430740356, "logps/chosen": -1.8321279287338257, "logps/rejected": -3.6002769470214844, "loss": 0.4803, "nll_loss": 0.45582395792007446, "rewards/accuracies": 0.875, "rewards/chosen": -0.1832127869129181, "rewards/margins": 0.17681488394737244, "rewards/rejected": -0.3600276708602905, "step": 6261 }, { "epoch": 17.144421629021217, "grad_norm": 3.693934917449951, "learning_rate": 1.421917808219178e-07, "log_odds_chosen": 3.5632073879241943, "log_odds_ratio": -0.11558905243873596, "logits/chosen": 1.0145708322525024, "logits/rejected": 1.063613772392273, "logps/chosen": -2.3749170303344727, "logps/rejected": -5.814844131469727, "loss": 0.6714, "nll_loss": 0.6598300933837891, "rewards/accuracies": 1.0, "rewards/chosen": -0.2374916970729828, "rewards/margins": 0.34399276971817017, "rewards/rejected": -0.5814844369888306, "step": 6262 }, { "epoch": 17.14715947980835, "grad_norm": 5.434290409088135, "learning_rate": 1.4205479452054794e-07, "log_odds_chosen": 1.971684217453003, "log_odds_ratio": -0.19284455478191376, "logits/chosen": 1.2020444869995117, "logits/rejected": 1.1821765899658203, "logps/chosen": -2.6983959674835205, "logps/rejected": -4.555730819702148, "loss": 0.564, "nll_loss": 0.5447653532028198, "rewards/accuracies": 1.0, "rewards/chosen": -0.2698395848274231, "rewards/margins": 0.18573346734046936, "rewards/rejected": -0.45557308197021484, "step": 6263 }, { "epoch": 17.149897330595483, "grad_norm": 4.076581001281738, "learning_rate": 1.4191780821917807e-07, "log_odds_chosen": 4.818325042724609, "log_odds_ratio": -0.059391580522060394, "logits/chosen": 1.0673205852508545, "logits/rejected": 1.163886547088623, "logps/chosen": -1.9417930841445923, "logps/rejected": -6.516107559204102, "loss": 0.7067, "nll_loss": 0.7007959485054016, "rewards/accuracies": 1.0, "rewards/chosen": -0.19417931139469147, "rewards/margins": 0.457431435585022, "rewards/rejected": -0.6516107320785522, "step": 6264 }, { "epoch": 17.152635181382614, "grad_norm": 3.8384816646575928, "learning_rate": 1.4178082191780822e-07, "log_odds_chosen": 1.9410083293914795, "log_odds_ratio": -0.23485134541988373, "logits/chosen": 1.2663688659667969, "logits/rejected": 1.3131194114685059, "logps/chosen": -2.0202646255493164, "logps/rejected": -3.7959563732147217, "loss": 0.4714, "nll_loss": 0.447876513004303, "rewards/accuracies": 1.0, "rewards/chosen": -0.20202648639678955, "rewards/margins": 0.1775691658258438, "rewards/rejected": -0.37959563732147217, "step": 6265 }, { "epoch": 17.155373032169745, "grad_norm": 3.7391769886016846, "learning_rate": 1.4164383561643837e-07, "log_odds_chosen": 4.684773921966553, "log_odds_ratio": -0.08688147366046906, "logits/chosen": 0.9074628353118896, "logits/rejected": 0.9053845405578613, "logps/chosen": -2.1392030715942383, "logps/rejected": -6.634188652038574, "loss": 0.6856, "nll_loss": 0.6768774390220642, "rewards/accuracies": 1.0, "rewards/chosen": -0.21392029523849487, "rewards/margins": 0.44949859380722046, "rewards/rejected": -0.6634188294410706, "step": 6266 }, { "epoch": 17.15811088295688, "grad_norm": 7.410006523132324, "learning_rate": 1.415068493150685e-07, "log_odds_chosen": 1.1604242324829102, "log_odds_ratio": -0.4700555205345154, "logits/chosen": 1.259544014930725, "logits/rejected": 1.2918776273727417, "logps/chosen": -2.557594060897827, "logps/rejected": -3.6336450576782227, "loss": 0.6857, "nll_loss": 0.6386643648147583, "rewards/accuracies": 0.75, "rewards/chosen": -0.25575941801071167, "rewards/margins": 0.10760507732629776, "rewards/rejected": -0.3633645176887512, "step": 6267 }, { "epoch": 17.16084873374401, "grad_norm": 3.559213876724243, "learning_rate": 1.4136986301369862e-07, "log_odds_chosen": 2.2099664211273193, "log_odds_ratio": -0.2875378727912903, "logits/chosen": 1.0568091869354248, "logits/rejected": 1.0442718267440796, "logps/chosen": -2.4888625144958496, "logps/rejected": -4.551338195800781, "loss": 0.6372, "nll_loss": 0.6084920763969421, "rewards/accuracies": 0.75, "rewards/chosen": -0.24888625741004944, "rewards/margins": 0.20624758303165436, "rewards/rejected": -0.455133855342865, "step": 6268 }, { "epoch": 17.163586584531142, "grad_norm": 4.617233753204346, "learning_rate": 1.4123287671232875e-07, "log_odds_chosen": 3.3684723377227783, "log_odds_ratio": -0.07353726029396057, "logits/chosen": 1.2755651473999023, "logits/rejected": 1.3702561855316162, "logps/chosen": -3.1544060707092285, "logps/rejected": -6.373788833618164, "loss": 0.7165, "nll_loss": 0.7091754674911499, "rewards/accuracies": 1.0, "rewards/chosen": -0.3154405951499939, "rewards/margins": 0.32193827629089355, "rewards/rejected": -0.6373788714408875, "step": 6269 }, { "epoch": 17.166324435318277, "grad_norm": 6.311559200286865, "learning_rate": 1.410958904109589e-07, "log_odds_chosen": 3.875248908996582, "log_odds_ratio": -0.266563355922699, "logits/chosen": 1.2601441144943237, "logits/rejected": 1.2312554121017456, "logps/chosen": -2.1627449989318848, "logps/rejected": -5.889662742614746, "loss": 0.5451, "nll_loss": 0.5184851884841919, "rewards/accuracies": 0.875, "rewards/chosen": -0.21627449989318848, "rewards/margins": 0.372691810131073, "rewards/rejected": -0.5889662504196167, "step": 6270 }, { "epoch": 17.169062286105408, "grad_norm": 4.028478145599365, "learning_rate": 1.4095890410958902e-07, "log_odds_chosen": 4.130073547363281, "log_odds_ratio": -0.0576544925570488, "logits/chosen": 1.4964725971221924, "logits/rejected": 1.537422776222229, "logps/chosen": -1.5996440649032593, "logps/rejected": -5.481868743896484, "loss": 0.4504, "nll_loss": 0.44463276863098145, "rewards/accuracies": 1.0, "rewards/chosen": -0.1599644124507904, "rewards/margins": 0.38822251558303833, "rewards/rejected": -0.5481868982315063, "step": 6271 }, { "epoch": 17.17180013689254, "grad_norm": 3.57062029838562, "learning_rate": 1.4082191780821918e-07, "log_odds_chosen": 4.893435001373291, "log_odds_ratio": -0.058392900973558426, "logits/chosen": 1.132483720779419, "logits/rejected": 1.0723166465759277, "logps/chosen": -1.7745022773742676, "logps/rejected": -6.4559221267700195, "loss": 0.6587, "nll_loss": 0.6528185606002808, "rewards/accuracies": 1.0, "rewards/chosen": -0.1774502396583557, "rewards/margins": 0.468142032623291, "rewards/rejected": -0.6455922722816467, "step": 6272 }, { "epoch": 17.17453798767967, "grad_norm": 3.2825474739074707, "learning_rate": 1.4068493150684933e-07, "log_odds_chosen": 2.732281446456909, "log_odds_ratio": -0.15037482976913452, "logits/chosen": 0.9455810785293579, "logits/rejected": 0.8724620938301086, "logps/chosen": -1.7989054918289185, "logps/rejected": -4.332330226898193, "loss": 0.4907, "nll_loss": 0.4756581783294678, "rewards/accuracies": 1.0, "rewards/chosen": -0.17989055812358856, "rewards/margins": 0.25334247946739197, "rewards/rejected": -0.43323299288749695, "step": 6273 }, { "epoch": 17.177275838466805, "grad_norm": 3.8428492546081543, "learning_rate": 1.4054794520547943e-07, "log_odds_chosen": 2.5733933448791504, "log_odds_ratio": -0.2080039530992508, "logits/chosen": 1.137795090675354, "logits/rejected": 1.0120172500610352, "logps/chosen": -1.6512689590454102, "logps/rejected": -3.8973889350891113, "loss": 0.5223, "nll_loss": 0.5015188455581665, "rewards/accuracies": 1.0, "rewards/chosen": -0.16512690484523773, "rewards/margins": 0.22461199760437012, "rewards/rejected": -0.38973888754844666, "step": 6274 }, { "epoch": 17.180013689253936, "grad_norm": 3.287476062774658, "learning_rate": 1.4041095890410958e-07, "log_odds_chosen": 4.177044868469238, "log_odds_ratio": -0.1412263661623001, "logits/chosen": 1.5636013746261597, "logits/rejected": 1.5439672470092773, "logps/chosen": -2.0993340015411377, "logps/rejected": -6.048760890960693, "loss": 0.4942, "nll_loss": 0.4800344407558441, "rewards/accuracies": 1.0, "rewards/chosen": -0.20993340015411377, "rewards/margins": 0.39494264125823975, "rewards/rejected": -0.6048760414123535, "step": 6275 }, { "epoch": 17.182751540041068, "grad_norm": 3.2146430015563965, "learning_rate": 1.402739726027397e-07, "log_odds_chosen": 2.603996753692627, "log_odds_ratio": -0.17688488960266113, "logits/chosen": 1.06229829788208, "logits/rejected": 1.0181876420974731, "logps/chosen": -1.5619924068450928, "logps/rejected": -3.975517988204956, "loss": 0.5122, "nll_loss": 0.4944692850112915, "rewards/accuracies": 1.0, "rewards/chosen": -0.15619924664497375, "rewards/margins": 0.24135255813598633, "rewards/rejected": -0.3975518047809601, "step": 6276 }, { "epoch": 17.1854893908282, "grad_norm": 4.932396411895752, "learning_rate": 1.4013698630136986e-07, "log_odds_chosen": 2.5534656047821045, "log_odds_ratio": -0.43143874406814575, "logits/chosen": 0.8335599303245544, "logits/rejected": 0.8679790496826172, "logps/chosen": -2.4931957721710205, "logps/rejected": -4.941147327423096, "loss": 0.6196, "nll_loss": 0.5764174461364746, "rewards/accuracies": 0.75, "rewards/chosen": -0.24931958317756653, "rewards/margins": 0.24479517340660095, "rewards/rejected": -0.4941147565841675, "step": 6277 }, { "epoch": 17.188227241615333, "grad_norm": 4.126364231109619, "learning_rate": 1.4e-07, "log_odds_chosen": 3.8285202980041504, "log_odds_ratio": -0.11262321472167969, "logits/chosen": 0.9000652432441711, "logits/rejected": 0.90689617395401, "logps/chosen": -2.4985358715057373, "logps/rejected": -6.2052435874938965, "loss": 0.5604, "nll_loss": 0.549157977104187, "rewards/accuracies": 1.0, "rewards/chosen": -0.24985358119010925, "rewards/margins": 0.37067073583602905, "rewards/rejected": -0.6205243468284607, "step": 6278 }, { "epoch": 17.190965092402465, "grad_norm": 4.737746715545654, "learning_rate": 1.3986301369863014e-07, "log_odds_chosen": 2.621584892272949, "log_odds_ratio": -0.23612594604492188, "logits/chosen": 1.0350676774978638, "logits/rejected": 0.9652670621871948, "logps/chosen": -2.289750099182129, "logps/rejected": -4.717000961303711, "loss": 0.6154, "nll_loss": 0.5917449593544006, "rewards/accuracies": 0.875, "rewards/chosen": -0.22897501289844513, "rewards/margins": 0.24272510409355164, "rewards/rejected": -0.47170013189315796, "step": 6279 }, { "epoch": 17.193702943189596, "grad_norm": 4.749578952789307, "learning_rate": 1.397260273972603e-07, "log_odds_chosen": 3.29781436920166, "log_odds_ratio": -0.11184035986661911, "logits/chosen": 0.981149435043335, "logits/rejected": 0.9609174728393555, "logps/chosen": -2.121737480163574, "logps/rejected": -5.240259170532227, "loss": 0.669, "nll_loss": 0.6577975749969482, "rewards/accuracies": 1.0, "rewards/chosen": -0.21217377483844757, "rewards/margins": 0.31185218691825867, "rewards/rejected": -0.5240259170532227, "step": 6280 }, { "epoch": 17.196440793976727, "grad_norm": 3.7341203689575195, "learning_rate": 1.3958904109589039e-07, "log_odds_chosen": 4.387858867645264, "log_odds_ratio": -0.2192169725894928, "logits/chosen": 1.2978715896606445, "logits/rejected": 1.3026530742645264, "logps/chosen": -1.875842809677124, "logps/rejected": -6.0594162940979, "loss": 0.5422, "nll_loss": 0.5202822089195251, "rewards/accuracies": 1.0, "rewards/chosen": -0.1875842809677124, "rewards/margins": 0.41835740208625793, "rewards/rejected": -0.605941653251648, "step": 6281 }, { "epoch": 17.19917864476386, "grad_norm": 7.891197681427002, "learning_rate": 1.3945205479452054e-07, "log_odds_chosen": 3.0868096351623535, "log_odds_ratio": -0.2801100015640259, "logits/chosen": 1.0778224468231201, "logits/rejected": 1.106852650642395, "logps/chosen": -2.0698776245117188, "logps/rejected": -4.993864059448242, "loss": 0.6539, "nll_loss": 0.6258628964424133, "rewards/accuracies": 0.875, "rewards/chosen": -0.20698776841163635, "rewards/margins": 0.29239869117736816, "rewards/rejected": -0.4993864595890045, "step": 6282 }, { "epoch": 17.201916495550993, "grad_norm": 3.8446099758148193, "learning_rate": 1.3931506849315066e-07, "log_odds_chosen": 2.2103559970855713, "log_odds_ratio": -0.3741365671157837, "logits/chosen": 1.260878324508667, "logits/rejected": 1.3580790758132935, "logps/chosen": -2.373589515686035, "logps/rejected": -4.46255350112915, "loss": 0.5781, "nll_loss": 0.5406423807144165, "rewards/accuracies": 0.875, "rewards/chosen": -0.237358957529068, "rewards/margins": 0.20889639854431152, "rewards/rejected": -0.4462553560733795, "step": 6283 }, { "epoch": 17.204654346338124, "grad_norm": 3.1064348220825195, "learning_rate": 1.3917808219178082e-07, "log_odds_chosen": 5.236023426055908, "log_odds_ratio": -0.09258078038692474, "logits/chosen": 1.0570662021636963, "logits/rejected": 0.9821709394454956, "logps/chosen": -2.021338701248169, "logps/rejected": -7.059022903442383, "loss": 0.5776, "nll_loss": 0.5683066844940186, "rewards/accuracies": 1.0, "rewards/chosen": -0.20213386416435242, "rewards/margins": 0.5037684440612793, "rewards/rejected": -0.7059022784233093, "step": 6284 }, { "epoch": 17.207392197125255, "grad_norm": 3.0212199687957764, "learning_rate": 1.3904109589041097e-07, "log_odds_chosen": 3.9193012714385986, "log_odds_ratio": -0.18344971537590027, "logits/chosen": 0.9349725842475891, "logits/rejected": 0.8986901640892029, "logps/chosen": -1.5858962535858154, "logps/rejected": -5.293346405029297, "loss": 0.5313, "nll_loss": 0.5129457712173462, "rewards/accuracies": 1.0, "rewards/chosen": -0.15858961641788483, "rewards/margins": 0.3707450330257416, "rewards/rejected": -0.5293346643447876, "step": 6285 }, { "epoch": 17.21013004791239, "grad_norm": 3.5563228130340576, "learning_rate": 1.389041095890411e-07, "log_odds_chosen": 3.520127773284912, "log_odds_ratio": -0.2534216642379761, "logits/chosen": 1.140981674194336, "logits/rejected": 1.0544533729553223, "logps/chosen": -1.253662347793579, "logps/rejected": -4.506574630737305, "loss": 0.4744, "nll_loss": 0.44902315735816956, "rewards/accuracies": 0.875, "rewards/chosen": -0.1253662258386612, "rewards/margins": 0.325291246175766, "rewards/rejected": -0.4506574869155884, "step": 6286 }, { "epoch": 17.21286789869952, "grad_norm": 4.463654518127441, "learning_rate": 1.3876712328767125e-07, "log_odds_chosen": 3.790234327316284, "log_odds_ratio": -0.18104258179664612, "logits/chosen": 1.4227303266525269, "logits/rejected": 1.4823344945907593, "logps/chosen": -2.0822086334228516, "logps/rejected": -5.708883285522461, "loss": 0.5764, "nll_loss": 0.5582613348960876, "rewards/accuracies": 1.0, "rewards/chosen": -0.20822085440158844, "rewards/margins": 0.3626675009727478, "rewards/rejected": -0.570888340473175, "step": 6287 }, { "epoch": 17.215605749486652, "grad_norm": 6.480225086212158, "learning_rate": 1.3863013698630135e-07, "log_odds_chosen": 1.9183329343795776, "log_odds_ratio": -0.5117554068565369, "logits/chosen": 1.1382492780685425, "logits/rejected": 1.1385608911514282, "logps/chosen": -1.7514797449111938, "logps/rejected": -3.569390058517456, "loss": 0.6091, "nll_loss": 0.5578751564025879, "rewards/accuracies": 0.875, "rewards/chosen": -0.17514798045158386, "rewards/margins": 0.1817910224199295, "rewards/rejected": -0.35693901777267456, "step": 6288 }, { "epoch": 17.218343600273784, "grad_norm": 12.143096923828125, "learning_rate": 1.384931506849315e-07, "log_odds_chosen": 1.6691114902496338, "log_odds_ratio": -0.46237772703170776, "logits/chosen": 0.9586167931556702, "logits/rejected": 0.9330468773841858, "logps/chosen": -2.5287864208221436, "logps/rejected": -3.993464946746826, "loss": 0.5133, "nll_loss": 0.4670525789260864, "rewards/accuracies": 0.875, "rewards/chosen": -0.25287866592407227, "rewards/margins": 0.14646784961223602, "rewards/rejected": -0.3993465304374695, "step": 6289 }, { "epoch": 17.22108145106092, "grad_norm": 3.5755510330200195, "learning_rate": 1.3835616438356162e-07, "log_odds_chosen": 3.6468207836151123, "log_odds_ratio": -0.11947675794363022, "logits/chosen": 1.1806037425994873, "logits/rejected": 1.1922005414962769, "logps/chosen": -2.2494261264801025, "logps/rejected": -5.76959228515625, "loss": 0.5525, "nll_loss": 0.5405634045600891, "rewards/accuracies": 1.0, "rewards/chosen": -0.2249426245689392, "rewards/margins": 0.3520165681838989, "rewards/rejected": -0.5769591331481934, "step": 6290 }, { "epoch": 17.22381930184805, "grad_norm": 11.398158073425293, "learning_rate": 1.3821917808219177e-07, "log_odds_chosen": 1.5915552377700806, "log_odds_ratio": -0.4211956262588501, "logits/chosen": 1.1510425806045532, "logits/rejected": 1.1744660139083862, "logps/chosen": -3.114931106567383, "logps/rejected": -4.596989631652832, "loss": 0.587, "nll_loss": 0.5448917746543884, "rewards/accuracies": 0.875, "rewards/chosen": -0.3114931285381317, "rewards/margins": 0.14820587635040283, "rewards/rejected": -0.45969900488853455, "step": 6291 }, { "epoch": 17.22655715263518, "grad_norm": 3.6550962924957275, "learning_rate": 1.3808219178082193e-07, "log_odds_chosen": 3.616025686264038, "log_odds_ratio": -0.19860370457172394, "logits/chosen": 1.3726563453674316, "logits/rejected": 1.391830325126648, "logps/chosen": -2.01558780670166, "logps/rejected": -5.493046283721924, "loss": 0.5399, "nll_loss": 0.520028829574585, "rewards/accuracies": 1.0, "rewards/chosen": -0.20155879855155945, "rewards/margins": 0.3477458357810974, "rewards/rejected": -0.5493046641349792, "step": 6292 }, { "epoch": 17.229295003422312, "grad_norm": 4.280855178833008, "learning_rate": 1.3794520547945205e-07, "log_odds_chosen": 3.249222755432129, "log_odds_ratio": -0.12829682230949402, "logits/chosen": 1.184369683265686, "logits/rejected": 1.1255182027816772, "logps/chosen": -1.9988186359405518, "logps/rejected": -5.056793212890625, "loss": 0.4983, "nll_loss": 0.48544037342071533, "rewards/accuracies": 1.0, "rewards/chosen": -0.19988186657428741, "rewards/margins": 0.3057974576950073, "rewards/rejected": -0.5056793689727783, "step": 6293 }, { "epoch": 17.232032854209447, "grad_norm": 4.1160454750061035, "learning_rate": 1.3780821917808218e-07, "log_odds_chosen": 2.8687400817871094, "log_odds_ratio": -0.19542355835437775, "logits/chosen": 1.1208420991897583, "logits/rejected": 1.08723783493042, "logps/chosen": -1.4265356063842773, "logps/rejected": -3.9568023681640625, "loss": 0.3827, "nll_loss": 0.36320701241493225, "rewards/accuracies": 1.0, "rewards/chosen": -0.14265355467796326, "rewards/margins": 0.25302669405937195, "rewards/rejected": -0.3956802487373352, "step": 6294 }, { "epoch": 17.234770704996578, "grad_norm": 4.071774005889893, "learning_rate": 1.376712328767123e-07, "log_odds_chosen": 1.4811725616455078, "log_odds_ratio": -0.33007168769836426, "logits/chosen": 1.0317238569259644, "logits/rejected": 1.0598069429397583, "logps/chosen": -1.7863274812698364, "logps/rejected": -3.135042667388916, "loss": 0.5394, "nll_loss": 0.5064014792442322, "rewards/accuracies": 0.875, "rewards/chosen": -0.1786327362060547, "rewards/margins": 0.13487154245376587, "rewards/rejected": -0.31350430846214294, "step": 6295 }, { "epoch": 17.23750855578371, "grad_norm": 4.291026592254639, "learning_rate": 1.3753424657534246e-07, "log_odds_chosen": 3.098419666290283, "log_odds_ratio": -0.13027650117874146, "logits/chosen": 0.914405345916748, "logits/rejected": 0.8426899909973145, "logps/chosen": -1.890119194984436, "logps/rejected": -4.729786396026611, "loss": 0.6925, "nll_loss": 0.679481029510498, "rewards/accuracies": 1.0, "rewards/chosen": -0.18901193141937256, "rewards/margins": 0.2839667499065399, "rewards/rejected": -0.4729786813259125, "step": 6296 }, { "epoch": 17.240246406570844, "grad_norm": 3.8162741661071777, "learning_rate": 1.373972602739726e-07, "log_odds_chosen": 2.6002302169799805, "log_odds_ratio": -0.3187066316604614, "logits/chosen": 1.1766667366027832, "logits/rejected": 1.2108681201934814, "logps/chosen": -2.282832384109497, "logps/rejected": -4.782607078552246, "loss": 0.6632, "nll_loss": 0.6313022375106812, "rewards/accuracies": 0.875, "rewards/chosen": -0.22828325629234314, "rewards/margins": 0.24997740983963013, "rewards/rejected": -0.4782606363296509, "step": 6297 }, { "epoch": 17.242984257357975, "grad_norm": 3.619194507598877, "learning_rate": 1.3726027397260273e-07, "log_odds_chosen": 3.8076415061950684, "log_odds_ratio": -0.20075100660324097, "logits/chosen": 0.9377145767211914, "logits/rejected": 0.9289175271987915, "logps/chosen": -1.7796432971954346, "logps/rejected": -5.409358501434326, "loss": 0.5612, "nll_loss": 0.5410853028297424, "rewards/accuracies": 1.0, "rewards/chosen": -0.17796434462070465, "rewards/margins": 0.3629715144634247, "rewards/rejected": -0.5409358739852905, "step": 6298 }, { "epoch": 17.245722108145106, "grad_norm": 5.517128944396973, "learning_rate": 1.3712328767123289e-07, "log_odds_chosen": 2.0603625774383545, "log_odds_ratio": -0.4494668245315552, "logits/chosen": 1.508704423904419, "logits/rejected": 1.4555158615112305, "logps/chosen": -1.6937980651855469, "logps/rejected": -3.4946160316467285, "loss": 0.5355, "nll_loss": 0.49051958322525024, "rewards/accuracies": 0.875, "rewards/chosen": -0.1693798005580902, "rewards/margins": 0.1800818145275116, "rewards/rejected": -0.3494616150856018, "step": 6299 }, { "epoch": 17.248459958932237, "grad_norm": 4.171250820159912, "learning_rate": 1.36986301369863e-07, "log_odds_chosen": 1.5523837804794312, "log_odds_ratio": -0.2668434679508209, "logits/chosen": 1.0929722785949707, "logits/rejected": 0.9559670090675354, "logps/chosen": -1.415779948234558, "logps/rejected": -2.7794461250305176, "loss": 0.4777, "nll_loss": 0.4509755074977875, "rewards/accuracies": 1.0, "rewards/chosen": -0.14157798886299133, "rewards/margins": 0.13636662065982819, "rewards/rejected": -0.2779445946216583, "step": 6300 }, { "epoch": 17.251197809719372, "grad_norm": 3.2779831886291504, "learning_rate": 1.3684931506849314e-07, "log_odds_chosen": 2.1920242309570312, "log_odds_ratio": -0.292291522026062, "logits/chosen": 0.9162673354148865, "logits/rejected": 0.9133694171905518, "logps/chosen": -1.8039121627807617, "logps/rejected": -3.839263916015625, "loss": 0.531, "nll_loss": 0.5017825961112976, "rewards/accuracies": 0.875, "rewards/chosen": -0.18039120733737946, "rewards/margins": 0.20353516936302185, "rewards/rejected": -0.3839263916015625, "step": 6301 }, { "epoch": 17.253935660506503, "grad_norm": 3.1781818866729736, "learning_rate": 1.3671232876712326e-07, "log_odds_chosen": 3.500751256942749, "log_odds_ratio": -0.1386527568101883, "logits/chosen": 1.2789252996444702, "logits/rejected": 1.2318881750106812, "logps/chosen": -1.6284403800964355, "logps/rejected": -4.893500328063965, "loss": 0.4758, "nll_loss": 0.46189671754837036, "rewards/accuracies": 1.0, "rewards/chosen": -0.16284403204917908, "rewards/margins": 0.32650598883628845, "rewards/rejected": -0.48935002088546753, "step": 6302 }, { "epoch": 17.256673511293634, "grad_norm": 7.335266590118408, "learning_rate": 1.3657534246575341e-07, "log_odds_chosen": 5.352986812591553, "log_odds_ratio": -0.11020370572805405, "logits/chosen": 1.3494294881820679, "logits/rejected": 1.2934515476226807, "logps/chosen": -2.4104716777801514, "logps/rejected": -7.611464500427246, "loss": 0.6588, "nll_loss": 0.6477912664413452, "rewards/accuracies": 1.0, "rewards/chosen": -0.24104717373847961, "rewards/margins": 0.5200993418693542, "rewards/rejected": -0.7611465454101562, "step": 6303 }, { "epoch": 17.259411362080765, "grad_norm": 4.192700386047363, "learning_rate": 1.3643835616438357e-07, "log_odds_chosen": 1.6310511827468872, "log_odds_ratio": -0.32193389534950256, "logits/chosen": 1.2197091579437256, "logits/rejected": 1.1277917623519897, "logps/chosen": -1.4621164798736572, "logps/rejected": -2.842397451400757, "loss": 0.4022, "nll_loss": 0.370036780834198, "rewards/accuracies": 0.875, "rewards/chosen": -0.1462116539478302, "rewards/margins": 0.1380281001329422, "rewards/rejected": -0.2842397391796112, "step": 6304 }, { "epoch": 17.2621492128679, "grad_norm": 6.2368245124816895, "learning_rate": 1.363013698630137e-07, "log_odds_chosen": 1.6906579732894897, "log_odds_ratio": -0.4288591742515564, "logits/chosen": 1.1730010509490967, "logits/rejected": 1.0945649147033691, "logps/chosen": -2.7454683780670166, "logps/rejected": -4.345778465270996, "loss": 0.6924, "nll_loss": 0.6494734287261963, "rewards/accuracies": 0.75, "rewards/chosen": -0.27454686164855957, "rewards/margins": 0.1600310504436493, "rewards/rejected": -0.4345778822898865, "step": 6305 }, { "epoch": 17.26488706365503, "grad_norm": 3.477926015853882, "learning_rate": 1.3616438356164384e-07, "log_odds_chosen": 4.174396514892578, "log_odds_ratio": -0.2164255678653717, "logits/chosen": 1.3125299215316772, "logits/rejected": 1.375898838043213, "logps/chosen": -2.083289384841919, "logps/rejected": -6.03408670425415, "loss": 0.5916, "nll_loss": 0.5699713826179504, "rewards/accuracies": 0.875, "rewards/chosen": -0.2083289474248886, "rewards/margins": 0.39507973194122314, "rewards/rejected": -0.603408694267273, "step": 6306 }, { "epoch": 17.267624914442163, "grad_norm": 4.085737705230713, "learning_rate": 1.3602739726027397e-07, "log_odds_chosen": 3.7253618240356445, "log_odds_ratio": -0.1916671097278595, "logits/chosen": 1.0686591863632202, "logits/rejected": 1.1969966888427734, "logps/chosen": -1.7465307712554932, "logps/rejected": -5.157177925109863, "loss": 0.5402, "nll_loss": 0.5210427045822144, "rewards/accuracies": 1.0, "rewards/chosen": -0.1746530830860138, "rewards/margins": 0.3410646915435791, "rewards/rejected": -0.5157178044319153, "step": 6307 }, { "epoch": 17.270362765229294, "grad_norm": 3.933819055557251, "learning_rate": 1.358904109589041e-07, "log_odds_chosen": 2.3315227031707764, "log_odds_ratio": -0.21786276996135712, "logits/chosen": 0.8140738606452942, "logits/rejected": 0.6991531848907471, "logps/chosen": -1.2602949142456055, "logps/rejected": -3.346874713897705, "loss": 0.5092, "nll_loss": 0.48741376399993896, "rewards/accuracies": 1.0, "rewards/chosen": -0.12602950632572174, "rewards/margins": 0.20865796506404877, "rewards/rejected": -0.3346874713897705, "step": 6308 }, { "epoch": 17.27310061601643, "grad_norm": 4.271871566772461, "learning_rate": 1.3575342465753422e-07, "log_odds_chosen": 2.8508968353271484, "log_odds_ratio": -0.13384732604026794, "logits/chosen": 1.0783469676971436, "logits/rejected": 1.1771657466888428, "logps/chosen": -1.9978259801864624, "logps/rejected": -4.681178092956543, "loss": 0.5513, "nll_loss": 0.537912130355835, "rewards/accuracies": 1.0, "rewards/chosen": -0.1997826099395752, "rewards/margins": 0.268335223197937, "rewards/rejected": -0.4681178331375122, "step": 6309 }, { "epoch": 17.27583846680356, "grad_norm": 3.887040615081787, "learning_rate": 1.3561643835616437e-07, "log_odds_chosen": 3.0724382400512695, "log_odds_ratio": -0.13287582993507385, "logits/chosen": 1.1008808612823486, "logits/rejected": 1.0242547988891602, "logps/chosen": -1.6621816158294678, "logps/rejected": -4.494602203369141, "loss": 0.4275, "nll_loss": 0.4142225980758667, "rewards/accuracies": 1.0, "rewards/chosen": -0.16621816158294678, "rewards/margins": 0.2832421064376831, "rewards/rejected": -0.4494602680206299, "step": 6310 }, { "epoch": 17.27857631759069, "grad_norm": 4.093495845794678, "learning_rate": 1.3547945205479453e-07, "log_odds_chosen": 1.7148852348327637, "log_odds_ratio": -0.27703091502189636, "logits/chosen": 0.9288702607154846, "logits/rejected": 0.8685428500175476, "logps/chosen": -1.8292232751846313, "logps/rejected": -3.4155631065368652, "loss": 0.5157, "nll_loss": 0.48795121908187866, "rewards/accuracies": 0.875, "rewards/chosen": -0.18292230367660522, "rewards/margins": 0.1586339771747589, "rewards/rejected": -0.3415563106536865, "step": 6311 }, { "epoch": 17.281314168377822, "grad_norm": 4.790345191955566, "learning_rate": 1.3534246575342465e-07, "log_odds_chosen": 1.6561787128448486, "log_odds_ratio": -0.24249958992004395, "logits/chosen": 0.9993129372596741, "logits/rejected": 0.9442305564880371, "logps/chosen": -1.493321180343628, "logps/rejected": -2.9190824031829834, "loss": 0.4626, "nll_loss": 0.43837136030197144, "rewards/accuracies": 1.0, "rewards/chosen": -0.14933213591575623, "rewards/margins": 0.14257609844207764, "rewards/rejected": -0.29190823435783386, "step": 6312 }, { "epoch": 17.284052019164957, "grad_norm": 5.587111473083496, "learning_rate": 1.352054794520548e-07, "log_odds_chosen": 2.617234706878662, "log_odds_ratio": -0.23645204305648804, "logits/chosen": 1.0635290145874023, "logits/rejected": 0.9686371684074402, "logps/chosen": -2.3548834323883057, "logps/rejected": -4.772653579711914, "loss": 0.5493, "nll_loss": 0.5256171226501465, "rewards/accuracies": 0.875, "rewards/chosen": -0.23548835515975952, "rewards/margins": 0.24177700281143188, "rewards/rejected": -0.4772653877735138, "step": 6313 }, { "epoch": 17.286789869952088, "grad_norm": 3.8662726879119873, "learning_rate": 1.350684931506849e-07, "log_odds_chosen": 1.9314988851547241, "log_odds_ratio": -0.2433917224407196, "logits/chosen": 1.0171799659729004, "logits/rejected": 0.9914888143539429, "logps/chosen": -1.297550916671753, "logps/rejected": -3.0030417442321777, "loss": 0.3844, "nll_loss": 0.3600199818611145, "rewards/accuracies": 1.0, "rewards/chosen": -0.12975509464740753, "rewards/margins": 0.17054909467697144, "rewards/rejected": -0.30030420422554016, "step": 6314 }, { "epoch": 17.28952772073922, "grad_norm": 3.282116413116455, "learning_rate": 1.3493150684931505e-07, "log_odds_chosen": 1.7702640295028687, "log_odds_ratio": -0.20817901194095612, "logits/chosen": 0.9769268035888672, "logits/rejected": 0.9646497368812561, "logps/chosen": -2.1342477798461914, "logps/rejected": -3.738955020904541, "loss": 0.5752, "nll_loss": 0.5544281005859375, "rewards/accuracies": 1.0, "rewards/chosen": -0.21342477202415466, "rewards/margins": 0.16047073900699615, "rewards/rejected": -0.3738954961299896, "step": 6315 }, { "epoch": 17.29226557152635, "grad_norm": 3.9996819496154785, "learning_rate": 1.347945205479452e-07, "log_odds_chosen": 1.8925920724868774, "log_odds_ratio": -0.22263196110725403, "logits/chosen": 1.0430543422698975, "logits/rejected": 1.1022498607635498, "logps/chosen": -1.9029176235198975, "logps/rejected": -3.641402244567871, "loss": 0.5043, "nll_loss": 0.48200395703315735, "rewards/accuracies": 1.0, "rewards/chosen": -0.19029176235198975, "rewards/margins": 0.1738484501838684, "rewards/rejected": -0.36414024233818054, "step": 6316 }, { "epoch": 17.295003422313485, "grad_norm": 3.9427618980407715, "learning_rate": 1.3465753424657533e-07, "log_odds_chosen": 3.1472604274749756, "log_odds_ratio": -0.15652146935462952, "logits/chosen": 1.1607284545898438, "logits/rejected": 1.2546579837799072, "logps/chosen": -1.8145952224731445, "logps/rejected": -4.766237258911133, "loss": 0.5635, "nll_loss": 0.5478099584579468, "rewards/accuracies": 1.0, "rewards/chosen": -0.18145951628684998, "rewards/margins": 0.29516419768333435, "rewards/rejected": -0.4766237735748291, "step": 6317 }, { "epoch": 17.297741273100616, "grad_norm": 5.200570583343506, "learning_rate": 1.3452054794520548e-07, "log_odds_chosen": 1.422774314880371, "log_odds_ratio": -0.578026533126831, "logits/chosen": 0.8601105213165283, "logits/rejected": 0.9898699522018433, "logps/chosen": -3.4902093410491943, "logps/rejected": -4.819194793701172, "loss": 0.6547, "nll_loss": 0.5968946218490601, "rewards/accuracies": 0.625, "rewards/chosen": -0.34902095794677734, "rewards/margins": 0.13289856910705566, "rewards/rejected": -0.4819194972515106, "step": 6318 }, { "epoch": 17.300479123887747, "grad_norm": 3.862945556640625, "learning_rate": 1.343835616438356e-07, "log_odds_chosen": 4.769772529602051, "log_odds_ratio": -0.10490696132183075, "logits/chosen": 1.0314217805862427, "logits/rejected": 0.9963982105255127, "logps/chosen": -2.2204012870788574, "logps/rejected": -6.751579761505127, "loss": 0.4925, "nll_loss": 0.4820479154586792, "rewards/accuracies": 1.0, "rewards/chosen": -0.22204014658927917, "rewards/margins": 0.45311781764030457, "rewards/rejected": -0.675157904624939, "step": 6319 }, { "epoch": 17.30321697467488, "grad_norm": 4.125380516052246, "learning_rate": 1.3424657534246576e-07, "log_odds_chosen": 3.128753662109375, "log_odds_ratio": -0.22761431336402893, "logits/chosen": 1.0787761211395264, "logits/rejected": 0.9539399147033691, "logps/chosen": -1.4033780097961426, "logps/rejected": -4.329251289367676, "loss": 0.5074, "nll_loss": 0.48463115096092224, "rewards/accuracies": 1.0, "rewards/chosen": -0.14033780992031097, "rewards/margins": 0.2925872802734375, "rewards/rejected": -0.43292510509490967, "step": 6320 }, { "epoch": 17.305954825462013, "grad_norm": 3.635641098022461, "learning_rate": 1.3410958904109586e-07, "log_odds_chosen": 2.948566198348999, "log_odds_ratio": -0.19836284220218658, "logits/chosen": 1.0681698322296143, "logits/rejected": 1.1364812850952148, "logps/chosen": -2.2264881134033203, "logps/rejected": -5.078417778015137, "loss": 0.545, "nll_loss": 0.5251554846763611, "rewards/accuracies": 1.0, "rewards/chosen": -0.22264879941940308, "rewards/margins": 0.28519296646118164, "rewards/rejected": -0.5078417658805847, "step": 6321 }, { "epoch": 17.308692676249144, "grad_norm": 6.3247199058532715, "learning_rate": 1.33972602739726e-07, "log_odds_chosen": 2.2291476726531982, "log_odds_ratio": -0.46465539932250977, "logits/chosen": 1.413776159286499, "logits/rejected": 1.457735538482666, "logps/chosen": -2.2114131450653076, "logps/rejected": -4.2616167068481445, "loss": 0.5339, "nll_loss": 0.48743438720703125, "rewards/accuracies": 0.75, "rewards/chosen": -0.22114130854606628, "rewards/margins": 0.20502036809921265, "rewards/rejected": -0.42616164684295654, "step": 6322 }, { "epoch": 17.311430527036276, "grad_norm": 5.828843116760254, "learning_rate": 1.3383561643835616e-07, "log_odds_chosen": 1.3847830295562744, "log_odds_ratio": -0.34842050075531006, "logits/chosen": 0.9659088253974915, "logits/rejected": 0.9928390979766846, "logps/chosen": -2.408097982406616, "logps/rejected": -3.6347568035125732, "loss": 0.5905, "nll_loss": 0.555618941783905, "rewards/accuracies": 1.0, "rewards/chosen": -0.24080978333950043, "rewards/margins": 0.1226658970117569, "rewards/rejected": -0.3634756803512573, "step": 6323 }, { "epoch": 17.31416837782341, "grad_norm": 4.671709060668945, "learning_rate": 1.336986301369863e-07, "log_odds_chosen": 2.850428819656372, "log_odds_ratio": -0.1540345400571823, "logits/chosen": 1.039674162864685, "logits/rejected": 1.054291009902954, "logps/chosen": -1.8194799423217773, "logps/rejected": -4.421877861022949, "loss": 0.4439, "nll_loss": 0.4285343289375305, "rewards/accuracies": 1.0, "rewards/chosen": -0.1819480061531067, "rewards/margins": 0.26023977994918823, "rewards/rejected": -0.4421877861022949, "step": 6324 }, { "epoch": 17.31690622861054, "grad_norm": 4.299850940704346, "learning_rate": 1.3356164383561644e-07, "log_odds_chosen": 5.493544101715088, "log_odds_ratio": -0.0283275805413723, "logits/chosen": 1.3357166051864624, "logits/rejected": 1.4089983701705933, "logps/chosen": -2.0589940547943115, "logps/rejected": -7.328393936157227, "loss": 0.6674, "nll_loss": 0.6646162271499634, "rewards/accuracies": 1.0, "rewards/chosen": -0.20589938759803772, "rewards/margins": 0.5269399881362915, "rewards/rejected": -0.7328393459320068, "step": 6325 }, { "epoch": 17.319644079397673, "grad_norm": 3.2472617626190186, "learning_rate": 1.3342465753424657e-07, "log_odds_chosen": 2.907219886779785, "log_odds_ratio": -0.15374314785003662, "logits/chosen": 1.1534560918807983, "logits/rejected": 1.1321574449539185, "logps/chosen": -1.7274909019470215, "logps/rejected": -4.462062835693359, "loss": 0.5579, "nll_loss": 0.5425731539726257, "rewards/accuracies": 1.0, "rewards/chosen": -0.1727491021156311, "rewards/margins": 0.27345719933509827, "rewards/rejected": -0.44620630145072937, "step": 6326 }, { "epoch": 17.322381930184804, "grad_norm": 6.41959285736084, "learning_rate": 1.3328767123287672e-07, "log_odds_chosen": 3.463911771774292, "log_odds_ratio": -0.24570441246032715, "logits/chosen": 1.1645232439041138, "logits/rejected": 1.158926248550415, "logps/chosen": -1.9691386222839355, "logps/rejected": -5.1688432693481445, "loss": 0.7083, "nll_loss": 0.6836814284324646, "rewards/accuracies": 0.875, "rewards/chosen": -0.19691386818885803, "rewards/margins": 0.31997042894363403, "rewards/rejected": -0.5168842673301697, "step": 6327 }, { "epoch": 17.32511978097194, "grad_norm": 3.826627016067505, "learning_rate": 1.3315068493150685e-07, "log_odds_chosen": 3.8276467323303223, "log_odds_ratio": -0.16429314017295837, "logits/chosen": 1.1913245916366577, "logits/rejected": 1.1225037574768066, "logps/chosen": -1.5247068405151367, "logps/rejected": -5.107348918914795, "loss": 0.4171, "nll_loss": 0.4006858468055725, "rewards/accuracies": 1.0, "rewards/chosen": -0.1524706780910492, "rewards/margins": 0.3582642078399658, "rewards/rejected": -0.5107349157333374, "step": 6328 }, { "epoch": 17.32785763175907, "grad_norm": 3.4417686462402344, "learning_rate": 1.3301369863013697e-07, "log_odds_chosen": 3.2567031383514404, "log_odds_ratio": -0.1537889540195465, "logits/chosen": 0.9057180285453796, "logits/rejected": 0.8340564370155334, "logps/chosen": -1.6984984874725342, "logps/rejected": -4.645808696746826, "loss": 0.5697, "nll_loss": 0.5543201565742493, "rewards/accuracies": 1.0, "rewards/chosen": -0.16984987258911133, "rewards/margins": 0.2947310209274292, "rewards/rejected": -0.4645808935165405, "step": 6329 }, { "epoch": 17.3305954825462, "grad_norm": 3.828427791595459, "learning_rate": 1.3287671232876712e-07, "log_odds_chosen": 2.1972458362579346, "log_odds_ratio": -0.3074990212917328, "logits/chosen": 1.0795795917510986, "logits/rejected": 1.0183923244476318, "logps/chosen": -1.3895773887634277, "logps/rejected": -3.367065191268921, "loss": 0.4971, "nll_loss": 0.46637028455734253, "rewards/accuracies": 0.875, "rewards/chosen": -0.13895775377750397, "rewards/margins": 0.19774876534938812, "rewards/rejected": -0.3367065191268921, "step": 6330 }, { "epoch": 17.333333333333332, "grad_norm": 5.786331653594971, "learning_rate": 1.3273972602739725e-07, "log_odds_chosen": 2.504282236099243, "log_odds_ratio": -0.39386385679244995, "logits/chosen": 0.9754999876022339, "logits/rejected": 0.9238283634185791, "logps/chosen": -2.8438560962677, "logps/rejected": -5.292928695678711, "loss": 0.6954, "nll_loss": 0.6560433506965637, "rewards/accuracies": 0.875, "rewards/chosen": -0.284385621547699, "rewards/margins": 0.2449072301387787, "rewards/rejected": -0.5292928814888, "step": 6331 }, { "epoch": 17.336071184120467, "grad_norm": 3.5940370559692383, "learning_rate": 1.326027397260274e-07, "log_odds_chosen": 3.1300549507141113, "log_odds_ratio": -0.18743209540843964, "logits/chosen": 1.3528273105621338, "logits/rejected": 1.360384225845337, "logps/chosen": -1.9847657680511475, "logps/rejected": -4.94888973236084, "loss": 0.5481, "nll_loss": 0.5293803811073303, "rewards/accuracies": 0.875, "rewards/chosen": -0.19847658276557922, "rewards/margins": 0.2964124083518982, "rewards/rejected": -0.4948890209197998, "step": 6332 }, { "epoch": 17.338809034907598, "grad_norm": 3.70422101020813, "learning_rate": 1.3246575342465753e-07, "log_odds_chosen": 4.1678595542907715, "log_odds_ratio": -0.06271758675575256, "logits/chosen": 1.226053237915039, "logits/rejected": 1.3101390600204468, "logps/chosen": -1.4586231708526611, "logps/rejected": -5.219661712646484, "loss": 0.4946, "nll_loss": 0.48833099007606506, "rewards/accuracies": 1.0, "rewards/chosen": -0.14586231112480164, "rewards/margins": 0.3761039078235626, "rewards/rejected": -0.5219662189483643, "step": 6333 }, { "epoch": 17.34154688569473, "grad_norm": 3.434380292892456, "learning_rate": 1.3232876712328768e-07, "log_odds_chosen": 2.00974178314209, "log_odds_ratio": -0.16270020604133606, "logits/chosen": 1.0388867855072021, "logits/rejected": 1.0075006484985352, "logps/chosen": -1.7687548398971558, "logps/rejected": -3.5903501510620117, "loss": 0.4811, "nll_loss": 0.46485352516174316, "rewards/accuracies": 1.0, "rewards/chosen": -0.17687548696994781, "rewards/margins": 0.18215951323509216, "rewards/rejected": -0.35903501510620117, "step": 6334 }, { "epoch": 17.34428473648186, "grad_norm": 7.989785194396973, "learning_rate": 1.321917808219178e-07, "log_odds_chosen": 4.056955337524414, "log_odds_ratio": -0.22278064489364624, "logits/chosen": 1.2151548862457275, "logits/rejected": 1.1740696430206299, "logps/chosen": -2.1091017723083496, "logps/rejected": -6.024481296539307, "loss": 0.6631, "nll_loss": 0.6408533453941345, "rewards/accuracies": 0.875, "rewards/chosen": -0.21091017127037048, "rewards/margins": 0.39153796434402466, "rewards/rejected": -0.6024481654167175, "step": 6335 }, { "epoch": 17.347022587268995, "grad_norm": 3.4925458431243896, "learning_rate": 1.3205479452054793e-07, "log_odds_chosen": 3.1019411087036133, "log_odds_ratio": -0.12612700462341309, "logits/chosen": 0.8517624139785767, "logits/rejected": 0.7533043622970581, "logps/chosen": -1.4098408222198486, "logps/rejected": -4.214853286743164, "loss": 0.4323, "nll_loss": 0.4197187125682831, "rewards/accuracies": 1.0, "rewards/chosen": -0.14098408818244934, "rewards/margins": 0.28050124645233154, "rewards/rejected": -0.42148536443710327, "step": 6336 }, { "epoch": 17.349760438056126, "grad_norm": 3.5295515060424805, "learning_rate": 1.3191780821917808e-07, "log_odds_chosen": 4.044488906860352, "log_odds_ratio": -0.16877438127994537, "logits/chosen": 1.0373382568359375, "logits/rejected": 1.0203899145126343, "logps/chosen": -1.5202250480651855, "logps/rejected": -5.273946285247803, "loss": 0.5527, "nll_loss": 0.5358527898788452, "rewards/accuracies": 1.0, "rewards/chosen": -0.15202249586582184, "rewards/margins": 0.37537211179733276, "rewards/rejected": -0.5273946523666382, "step": 6337 }, { "epoch": 17.352498288843258, "grad_norm": 4.4451069831848145, "learning_rate": 1.317808219178082e-07, "log_odds_chosen": 2.3556742668151855, "log_odds_ratio": -0.22570598125457764, "logits/chosen": 1.2606799602508545, "logits/rejected": 1.20550537109375, "logps/chosen": -1.7873411178588867, "logps/rejected": -3.9227795600891113, "loss": 0.4195, "nll_loss": 0.3969530165195465, "rewards/accuracies": 0.875, "rewards/chosen": -0.17873410880565643, "rewards/margins": 0.2135438323020935, "rewards/rejected": -0.39227795600891113, "step": 6338 }, { "epoch": 17.35523613963039, "grad_norm": 4.062728404998779, "learning_rate": 1.3164383561643836e-07, "log_odds_chosen": 1.2943984270095825, "log_odds_ratio": -0.38234463334083557, "logits/chosen": 1.1990925073623657, "logits/rejected": 1.204721450805664, "logps/chosen": -2.2619290351867676, "logps/rejected": -3.4709458351135254, "loss": 0.5332, "nll_loss": 0.4949697256088257, "rewards/accuracies": 1.0, "rewards/chosen": -0.2261929214000702, "rewards/margins": 0.1209016814827919, "rewards/rejected": -0.3470945954322815, "step": 6339 }, { "epoch": 17.357973990417523, "grad_norm": 4.372329235076904, "learning_rate": 1.3150684931506849e-07, "log_odds_chosen": 1.986133098602295, "log_odds_ratio": -0.1950078308582306, "logits/chosen": 1.0068068504333496, "logits/rejected": 0.9211733341217041, "logps/chosen": -1.7742027044296265, "logps/rejected": -3.5277976989746094, "loss": 0.474, "nll_loss": 0.4544638991355896, "rewards/accuracies": 1.0, "rewards/chosen": -0.1774202585220337, "rewards/margins": 0.17535951733589172, "rewards/rejected": -0.3527797758579254, "step": 6340 }, { "epoch": 17.360711841204655, "grad_norm": 4.538900375366211, "learning_rate": 1.313698630136986e-07, "log_odds_chosen": 2.9992856979370117, "log_odds_ratio": -0.2679128646850586, "logits/chosen": 1.0373021364212036, "logits/rejected": 1.0619874000549316, "logps/chosen": -2.528311252593994, "logps/rejected": -5.440272331237793, "loss": 0.6195, "nll_loss": 0.5927282571792603, "rewards/accuracies": 0.875, "rewards/chosen": -0.2528311312198639, "rewards/margins": 0.29119616746902466, "rewards/rejected": -0.5440273284912109, "step": 6341 }, { "epoch": 17.363449691991786, "grad_norm": 3.27204966545105, "learning_rate": 1.3123287671232876e-07, "log_odds_chosen": 4.570202827453613, "log_odds_ratio": -0.15032446384429932, "logits/chosen": 1.2439470291137695, "logits/rejected": 1.2221893072128296, "logps/chosen": -1.5367307662963867, "logps/rejected": -5.774010181427002, "loss": 0.4684, "nll_loss": 0.45339974761009216, "rewards/accuracies": 0.875, "rewards/chosen": -0.15367308259010315, "rewards/margins": 0.42372792959213257, "rewards/rejected": -0.5774009823799133, "step": 6342 }, { "epoch": 17.366187542778917, "grad_norm": 3.7012479305267334, "learning_rate": 1.310958904109589e-07, "log_odds_chosen": 2.811525344848633, "log_odds_ratio": -0.11914139986038208, "logits/chosen": 0.9994629621505737, "logits/rejected": 0.9751849174499512, "logps/chosen": -2.3948631286621094, "logps/rejected": -5.033315658569336, "loss": 0.6842, "nll_loss": 0.6722409725189209, "rewards/accuracies": 1.0, "rewards/chosen": -0.23948630690574646, "rewards/margins": 0.2638452649116516, "rewards/rejected": -0.5033315420150757, "step": 6343 }, { "epoch": 17.36892539356605, "grad_norm": 4.4480881690979, "learning_rate": 1.3095890410958904e-07, "log_odds_chosen": 2.2085089683532715, "log_odds_ratio": -0.2664834260940552, "logits/chosen": 1.0491976737976074, "logits/rejected": 1.066770076751709, "logps/chosen": -1.9399995803833008, "logps/rejected": -3.8659398555755615, "loss": 0.5703, "nll_loss": 0.5436514616012573, "rewards/accuracies": 1.0, "rewards/chosen": -0.19399994611740112, "rewards/margins": 0.19259405136108398, "rewards/rejected": -0.3865939974784851, "step": 6344 }, { "epoch": 17.371663244353183, "grad_norm": 5.225645542144775, "learning_rate": 1.3082191780821917e-07, "log_odds_chosen": 1.0614384412765503, "log_odds_ratio": -0.506091833114624, "logits/chosen": 0.9472131133079529, "logits/rejected": 0.9800013303756714, "logps/chosen": -2.238978862762451, "logps/rejected": -3.2557499408721924, "loss": 0.6308, "nll_loss": 0.5802319049835205, "rewards/accuracies": 0.75, "rewards/chosen": -0.22389787435531616, "rewards/margins": 0.10167711973190308, "rewards/rejected": -0.32557499408721924, "step": 6345 }, { "epoch": 17.374401095140314, "grad_norm": 2.931668519973755, "learning_rate": 1.3068493150684932e-07, "log_odds_chosen": 4.910054683685303, "log_odds_ratio": -0.089351125061512, "logits/chosen": 1.0001376867294312, "logits/rejected": 0.9809277057647705, "logps/chosen": -1.6792473793029785, "logps/rejected": -6.172735214233398, "loss": 0.5256, "nll_loss": 0.516692042350769, "rewards/accuracies": 1.0, "rewards/chosen": -0.16792473196983337, "rewards/margins": 0.4493488073348999, "rewards/rejected": -0.6172735691070557, "step": 6346 }, { "epoch": 17.377138945927445, "grad_norm": 3.6972544193267822, "learning_rate": 1.3054794520547947e-07, "log_odds_chosen": 3.7965810298919678, "log_odds_ratio": -0.1752222180366516, "logits/chosen": 0.9640984535217285, "logits/rejected": 0.9133553504943848, "logps/chosen": -1.5699149370193481, "logps/rejected": -5.16013240814209, "loss": 0.4826, "nll_loss": 0.4650673270225525, "rewards/accuracies": 1.0, "rewards/chosen": -0.15699148178100586, "rewards/margins": 0.35902172327041626, "rewards/rejected": -0.5160132646560669, "step": 6347 }, { "epoch": 17.37987679671458, "grad_norm": 3.8292834758758545, "learning_rate": 1.3041095890410957e-07, "log_odds_chosen": 2.632355213165283, "log_odds_ratio": -0.17327746748924255, "logits/chosen": 0.9216822385787964, "logits/rejected": 0.8851982355117798, "logps/chosen": -2.0073883533477783, "logps/rejected": -4.495123863220215, "loss": 0.5711, "nll_loss": 0.5537897944450378, "rewards/accuracies": 1.0, "rewards/chosen": -0.2007388472557068, "rewards/margins": 0.24877353012561798, "rewards/rejected": -0.44951239228248596, "step": 6348 }, { "epoch": 17.38261464750171, "grad_norm": 9.866841316223145, "learning_rate": 1.3027397260273972e-07, "log_odds_chosen": 2.05240535736084, "log_odds_ratio": -0.3794082999229431, "logits/chosen": 1.44944167137146, "logits/rejected": 1.4264154434204102, "logps/chosen": -1.978560209274292, "logps/rejected": -3.8312020301818848, "loss": 0.5456, "nll_loss": 0.5077053308486938, "rewards/accuracies": 0.875, "rewards/chosen": -0.19785600900650024, "rewards/margins": 0.18526417016983032, "rewards/rejected": -0.38312020897865295, "step": 6349 }, { "epoch": 17.385352498288842, "grad_norm": 8.810169219970703, "learning_rate": 1.3013698630136985e-07, "log_odds_chosen": 1.9863492250442505, "log_odds_ratio": -0.4105185866355896, "logits/chosen": 1.0357264280319214, "logits/rejected": 1.042327642440796, "logps/chosen": -1.9447911977767944, "logps/rejected": -3.7763233184814453, "loss": 0.551, "nll_loss": 0.5099269151687622, "rewards/accuracies": 0.875, "rewards/chosen": -0.19447912275791168, "rewards/margins": 0.1831531822681427, "rewards/rejected": -0.3776322901248932, "step": 6350 }, { "epoch": 17.388090349075977, "grad_norm": 3.614419460296631, "learning_rate": 1.3e-07, "log_odds_chosen": 3.4961631298065186, "log_odds_ratio": -0.24458962678909302, "logits/chosen": 1.0626096725463867, "logits/rejected": 0.989939272403717, "logps/chosen": -1.470447063446045, "logps/rejected": -4.539194107055664, "loss": 0.4484, "nll_loss": 0.42398929595947266, "rewards/accuracies": 0.875, "rewards/chosen": -0.14704471826553345, "rewards/margins": 0.30687469244003296, "rewards/rejected": -0.4539194107055664, "step": 6351 }, { "epoch": 17.39082819986311, "grad_norm": 6.305164337158203, "learning_rate": 1.2986301369863013e-07, "log_odds_chosen": 2.2065343856811523, "log_odds_ratio": -0.32297417521476746, "logits/chosen": 1.278782606124878, "logits/rejected": 1.204610824584961, "logps/chosen": -1.8916192054748535, "logps/rejected": -3.8596668243408203, "loss": 0.6139, "nll_loss": 0.5815972089767456, "rewards/accuracies": 0.875, "rewards/chosen": -0.18916192650794983, "rewards/margins": 0.19680476188659668, "rewards/rejected": -0.3859666585922241, "step": 6352 }, { "epoch": 17.39356605065024, "grad_norm": 3.6522364616394043, "learning_rate": 1.2972602739726028e-07, "log_odds_chosen": 1.9696416854858398, "log_odds_ratio": -0.1911238133907318, "logits/chosen": 1.0092424154281616, "logits/rejected": 0.8868973851203918, "logps/chosen": -1.3563015460968018, "logps/rejected": -3.081144332885742, "loss": 0.5243, "nll_loss": 0.5051411390304565, "rewards/accuracies": 1.0, "rewards/chosen": -0.13563016057014465, "rewards/margins": 0.17248427867889404, "rewards/rejected": -0.3081144392490387, "step": 6353 }, { "epoch": 17.39630390143737, "grad_norm": 3.6985480785369873, "learning_rate": 1.2958904109589043e-07, "log_odds_chosen": 1.7188338041305542, "log_odds_ratio": -0.27323025465011597, "logits/chosen": 1.1467105150222778, "logits/rejected": 1.126880168914795, "logps/chosen": -1.7648078203201294, "logps/rejected": -3.3572592735290527, "loss": 0.4372, "nll_loss": 0.40986695885658264, "rewards/accuracies": 1.0, "rewards/chosen": -0.17648079991340637, "rewards/margins": 0.15924513339996338, "rewards/rejected": -0.33572593331336975, "step": 6354 }, { "epoch": 17.399041752224505, "grad_norm": 3.2040369510650635, "learning_rate": 1.2945205479452053e-07, "log_odds_chosen": 3.458584785461426, "log_odds_ratio": -0.13789215683937073, "logits/chosen": 1.0074350833892822, "logits/rejected": 0.9402532577514648, "logps/chosen": -1.740660548210144, "logps/rejected": -5.002658367156982, "loss": 0.5487, "nll_loss": 0.534900426864624, "rewards/accuracies": 1.0, "rewards/chosen": -0.17406606674194336, "rewards/margins": 0.3261997401714325, "rewards/rejected": -0.5002658367156982, "step": 6355 }, { "epoch": 17.401779603011637, "grad_norm": 3.4935829639434814, "learning_rate": 1.2931506849315068e-07, "log_odds_chosen": 3.3277249336242676, "log_odds_ratio": -0.11641658842563629, "logits/chosen": 1.2277029752731323, "logits/rejected": 1.142645239830017, "logps/chosen": -2.105611562728882, "logps/rejected": -5.287513732910156, "loss": 0.553, "nll_loss": 0.5413914918899536, "rewards/accuracies": 1.0, "rewards/chosen": -0.21056115627288818, "rewards/margins": 0.31819018721580505, "rewards/rejected": -0.5287513732910156, "step": 6356 }, { "epoch": 17.404517453798768, "grad_norm": 3.43692946434021, "learning_rate": 1.291780821917808e-07, "log_odds_chosen": 4.131860733032227, "log_odds_ratio": -0.08598122745752335, "logits/chosen": 1.4326214790344238, "logits/rejected": 1.4407628774642944, "logps/chosen": -1.8457658290863037, "logps/rejected": -5.756979942321777, "loss": 0.5534, "nll_loss": 0.5447705984115601, "rewards/accuracies": 1.0, "rewards/chosen": -0.18457657098770142, "rewards/margins": 0.39112141728401184, "rewards/rejected": -0.5756980180740356, "step": 6357 }, { "epoch": 17.4072553045859, "grad_norm": 3.5958399772644043, "learning_rate": 1.2904109589041096e-07, "log_odds_chosen": 3.676431894302368, "log_odds_ratio": -0.1279366910457611, "logits/chosen": 0.9791145920753479, "logits/rejected": 1.0014066696166992, "logps/chosen": -1.8591796159744263, "logps/rejected": -5.3685808181762695, "loss": 0.704, "nll_loss": 0.6912145614624023, "rewards/accuracies": 1.0, "rewards/chosen": -0.1859179586172104, "rewards/margins": 0.35094010829925537, "rewards/rejected": -0.536858081817627, "step": 6358 }, { "epoch": 17.409993155373034, "grad_norm": 3.776073694229126, "learning_rate": 1.2890410958904108e-07, "log_odds_chosen": 2.9144983291625977, "log_odds_ratio": -0.20676648616790771, "logits/chosen": 1.4010391235351562, "logits/rejected": 1.2955043315887451, "logps/chosen": -1.6526203155517578, "logps/rejected": -4.401959419250488, "loss": 0.4865, "nll_loss": 0.4658495783805847, "rewards/accuracies": 1.0, "rewards/chosen": -0.16526202857494354, "rewards/margins": 0.27493393421173096, "rewards/rejected": -0.4401959776878357, "step": 6359 }, { "epoch": 17.412731006160165, "grad_norm": 3.7761592864990234, "learning_rate": 1.2876712328767124e-07, "log_odds_chosen": 3.344674587249756, "log_odds_ratio": -0.22267284989356995, "logits/chosen": 0.831297755241394, "logits/rejected": 0.841820240020752, "logps/chosen": -1.6194534301757812, "logps/rejected": -4.7886810302734375, "loss": 0.4749, "nll_loss": 0.4526156783103943, "rewards/accuracies": 1.0, "rewards/chosen": -0.1619453728199005, "rewards/margins": 0.31692278385162354, "rewards/rejected": -0.47886812686920166, "step": 6360 }, { "epoch": 17.415468856947296, "grad_norm": 4.057010173797607, "learning_rate": 1.2863013698630136e-07, "log_odds_chosen": 3.794802665710449, "log_odds_ratio": -0.17169015109539032, "logits/chosen": 1.3802244663238525, "logits/rejected": 1.431671380996704, "logps/chosen": -2.4211339950561523, "logps/rejected": -6.097687721252441, "loss": 0.6039, "nll_loss": 0.5867124795913696, "rewards/accuracies": 1.0, "rewards/chosen": -0.242113396525383, "rewards/margins": 0.3676553964614868, "rewards/rejected": -0.609768807888031, "step": 6361 }, { "epoch": 17.418206707734427, "grad_norm": 4.079263210296631, "learning_rate": 1.284931506849315e-07, "log_odds_chosen": 2.6169593334198, "log_odds_ratio": -0.30468103289604187, "logits/chosen": 0.907831609249115, "logits/rejected": 0.8576637506484985, "logps/chosen": -1.8702946901321411, "logps/rejected": -4.293910980224609, "loss": 0.5551, "nll_loss": 0.524660587310791, "rewards/accuracies": 0.875, "rewards/chosen": -0.18702948093414307, "rewards/margins": 0.2423616349697113, "rewards/rejected": -0.42939114570617676, "step": 6362 }, { "epoch": 17.420944558521562, "grad_norm": 5.337035655975342, "learning_rate": 1.2835616438356164e-07, "log_odds_chosen": 3.969573974609375, "log_odds_ratio": -0.10029465705156326, "logits/chosen": 1.2105355262756348, "logits/rejected": 1.2276270389556885, "logps/chosen": -2.651465892791748, "logps/rejected": -6.440727710723877, "loss": 0.633, "nll_loss": 0.6230140328407288, "rewards/accuracies": 1.0, "rewards/chosen": -0.2651466131210327, "rewards/margins": 0.378926157951355, "rewards/rejected": -0.6440727710723877, "step": 6363 }, { "epoch": 17.423682409308693, "grad_norm": 9.127528190612793, "learning_rate": 1.2821917808219176e-07, "log_odds_chosen": 2.235126495361328, "log_odds_ratio": -0.5626000165939331, "logits/chosen": 0.864173412322998, "logits/rejected": 0.8276915550231934, "logps/chosen": -3.063887596130371, "logps/rejected": -5.143141746520996, "loss": 0.5918, "nll_loss": 0.5355074405670166, "rewards/accuracies": 0.875, "rewards/chosen": -0.306388795375824, "rewards/margins": 0.20792540907859802, "rewards/rejected": -0.5143141746520996, "step": 6364 }, { "epoch": 17.426420260095824, "grad_norm": 3.6623575687408447, "learning_rate": 1.2808219178082192e-07, "log_odds_chosen": 2.878899335861206, "log_odds_ratio": -0.1794590950012207, "logits/chosen": 1.1149182319641113, "logits/rejected": 1.1326720714569092, "logps/chosen": -2.1626598834991455, "logps/rejected": -4.930247783660889, "loss": 0.5623, "nll_loss": 0.5443412065505981, "rewards/accuracies": 1.0, "rewards/chosen": -0.2162659764289856, "rewards/margins": 0.2767588198184967, "rewards/rejected": -0.4930247962474823, "step": 6365 }, { "epoch": 17.429158110882955, "grad_norm": 4.9602508544921875, "learning_rate": 1.2794520547945207e-07, "log_odds_chosen": 2.087608575820923, "log_odds_ratio": -0.3025810122489929, "logits/chosen": 1.0874695777893066, "logits/rejected": 1.0146507024765015, "logps/chosen": -1.8711438179016113, "logps/rejected": -3.773855447769165, "loss": 0.4661, "nll_loss": 0.4358835220336914, "rewards/accuracies": 0.75, "rewards/chosen": -0.1871143877506256, "rewards/margins": 0.19027116894721985, "rewards/rejected": -0.37738552689552307, "step": 6366 }, { "epoch": 17.43189596167009, "grad_norm": 3.7003173828125, "learning_rate": 1.278082191780822e-07, "log_odds_chosen": 3.14619517326355, "log_odds_ratio": -0.21777498722076416, "logits/chosen": 1.1723915338516235, "logits/rejected": 1.181012511253357, "logps/chosen": -1.6824631690979004, "logps/rejected": -4.651108264923096, "loss": 0.6637, "nll_loss": 0.6419723033905029, "rewards/accuracies": 1.0, "rewards/chosen": -0.168246328830719, "rewards/margins": 0.2968645393848419, "rewards/rejected": -0.4651108384132385, "step": 6367 }, { "epoch": 17.43463381245722, "grad_norm": 5.894155502319336, "learning_rate": 1.2767123287671232e-07, "log_odds_chosen": 1.2502875328063965, "log_odds_ratio": -0.6721570491790771, "logits/chosen": 0.899786651134491, "logits/rejected": 0.9898838996887207, "logps/chosen": -2.1942927837371826, "logps/rejected": -3.2907652854919434, "loss": 0.5832, "nll_loss": 0.5159425735473633, "rewards/accuracies": 0.75, "rewards/chosen": -0.21942926943302155, "rewards/margins": 0.1096472442150116, "rewards/rejected": -0.32907652854919434, "step": 6368 }, { "epoch": 17.437371663244353, "grad_norm": 4.773419380187988, "learning_rate": 1.2753424657534245e-07, "log_odds_chosen": 2.326537609100342, "log_odds_ratio": -0.26884061098098755, "logits/chosen": 1.0649842023849487, "logits/rejected": 1.0033477544784546, "logps/chosen": -1.6548646688461304, "logps/rejected": -3.7974650859832764, "loss": 0.4988, "nll_loss": 0.47190558910369873, "rewards/accuracies": 0.875, "rewards/chosen": -0.16548646986484528, "rewards/margins": 0.2142600417137146, "rewards/rejected": -0.37974652647972107, "step": 6369 }, { "epoch": 17.440109514031484, "grad_norm": 4.8575263023376465, "learning_rate": 1.273972602739726e-07, "log_odds_chosen": 1.986022710800171, "log_odds_ratio": -0.31811320781707764, "logits/chosen": 1.059614658355713, "logits/rejected": 1.0653034448623657, "logps/chosen": -2.1449694633483887, "logps/rejected": -4.061673641204834, "loss": 0.4999, "nll_loss": 0.46809083223342896, "rewards/accuracies": 0.75, "rewards/chosen": -0.2144969403743744, "rewards/margins": 0.19167043268680573, "rewards/rejected": -0.4061673581600189, "step": 6370 }, { "epoch": 17.44284736481862, "grad_norm": 3.8209924697875977, "learning_rate": 1.2726027397260272e-07, "log_odds_chosen": 5.09128475189209, "log_odds_ratio": -0.045939505100250244, "logits/chosen": 1.3138413429260254, "logits/rejected": 1.370954990386963, "logps/chosen": -1.9753834009170532, "logps/rejected": -6.740762710571289, "loss": 0.5437, "nll_loss": 0.5390810370445251, "rewards/accuracies": 1.0, "rewards/chosen": -0.1975383311510086, "rewards/margins": 0.4765379726886749, "rewards/rejected": -0.6740763187408447, "step": 6371 }, { "epoch": 17.44558521560575, "grad_norm": 3.5673828125, "learning_rate": 1.2712328767123288e-07, "log_odds_chosen": 3.9193954467773438, "log_odds_ratio": -0.2213549017906189, "logits/chosen": 1.113036870956421, "logits/rejected": 1.1237173080444336, "logps/chosen": -1.6553270816802979, "logps/rejected": -5.325754165649414, "loss": 0.5127, "nll_loss": 0.4905868172645569, "rewards/accuracies": 1.0, "rewards/chosen": -0.16553270816802979, "rewards/margins": 0.3670427203178406, "rewards/rejected": -0.5325754284858704, "step": 6372 }, { "epoch": 17.44832306639288, "grad_norm": 3.9871785640716553, "learning_rate": 1.2698630136986303e-07, "log_odds_chosen": 2.7491161823272705, "log_odds_ratio": -0.20343680679798126, "logits/chosen": 1.344234824180603, "logits/rejected": 1.3438639640808105, "logps/chosen": -1.2982314825057983, "logps/rejected": -3.7958405017852783, "loss": 0.3917, "nll_loss": 0.37134408950805664, "rewards/accuracies": 1.0, "rewards/chosen": -0.12982314825057983, "rewards/margins": 0.24976089596748352, "rewards/rejected": -0.37958404421806335, "step": 6373 }, { "epoch": 17.451060917180012, "grad_norm": 3.4589850902557373, "learning_rate": 1.2684931506849315e-07, "log_odds_chosen": 3.2478153705596924, "log_odds_ratio": -0.26292580366134644, "logits/chosen": 1.1228724718093872, "logits/rejected": 1.1406474113464355, "logps/chosen": -1.5602245330810547, "logps/rejected": -4.626699447631836, "loss": 0.58, "nll_loss": 0.5536686182022095, "rewards/accuracies": 0.875, "rewards/chosen": -0.15602244436740875, "rewards/margins": 0.30664756894111633, "rewards/rejected": -0.4626699984073639, "step": 6374 }, { "epoch": 17.453798767967147, "grad_norm": 3.734966993331909, "learning_rate": 1.2671232876712328e-07, "log_odds_chosen": 2.4432945251464844, "log_odds_ratio": -0.15164443850517273, "logits/chosen": 1.1590135097503662, "logits/rejected": 1.0689314603805542, "logps/chosen": -1.5921136140823364, "logps/rejected": -3.8207101821899414, "loss": 0.4351, "nll_loss": 0.41997820138931274, "rewards/accuracies": 1.0, "rewards/chosen": -0.15921136736869812, "rewards/margins": 0.22285965085029602, "rewards/rejected": -0.38207101821899414, "step": 6375 }, { "epoch": 17.456536618754278, "grad_norm": 4.249450206756592, "learning_rate": 1.265753424657534e-07, "log_odds_chosen": 1.63077974319458, "log_odds_ratio": -0.35133862495422363, "logits/chosen": 1.1115175485610962, "logits/rejected": 1.0880564451217651, "logps/chosen": -1.7150263786315918, "logps/rejected": -3.256653070449829, "loss": 0.4602, "nll_loss": 0.4251008927822113, "rewards/accuracies": 0.75, "rewards/chosen": -0.17150264978408813, "rewards/margins": 0.1541626751422882, "rewards/rejected": -0.32566535472869873, "step": 6376 }, { "epoch": 17.45927446954141, "grad_norm": 5.114913463592529, "learning_rate": 1.2643835616438356e-07, "log_odds_chosen": 3.6624066829681396, "log_odds_ratio": -0.15696117281913757, "logits/chosen": 0.8236174583435059, "logits/rejected": 0.7483605742454529, "logps/chosen": -1.9032487869262695, "logps/rejected": -5.339044570922852, "loss": 0.4775, "nll_loss": 0.4618184268474579, "rewards/accuracies": 1.0, "rewards/chosen": -0.19032487273216248, "rewards/margins": 0.34357962012290955, "rewards/rejected": -0.533904492855072, "step": 6377 }, { "epoch": 17.462012320328544, "grad_norm": 3.2689452171325684, "learning_rate": 1.2630136986301368e-07, "log_odds_chosen": 3.5274739265441895, "log_odds_ratio": -0.1314360648393631, "logits/chosen": 0.8979687690734863, "logits/rejected": 0.8425165414810181, "logps/chosen": -1.7660399675369263, "logps/rejected": -5.065408706665039, "loss": 0.5274, "nll_loss": 0.5143017768859863, "rewards/accuracies": 1.0, "rewards/chosen": -0.1766040027141571, "rewards/margins": 0.3299369215965271, "rewards/rejected": -0.5065408945083618, "step": 6378 }, { "epoch": 17.464750171115675, "grad_norm": 3.499173402786255, "learning_rate": 1.2616438356164383e-07, "log_odds_chosen": 3.5629873275756836, "log_odds_ratio": -0.1410309225320816, "logits/chosen": 1.3106744289398193, "logits/rejected": 1.3060890436172485, "logps/chosen": -1.4957644939422607, "logps/rejected": -4.847247123718262, "loss": 0.4666, "nll_loss": 0.4524625837802887, "rewards/accuracies": 1.0, "rewards/chosen": -0.14957645535469055, "rewards/margins": 0.33514824509620667, "rewards/rejected": -0.48472467064857483, "step": 6379 }, { "epoch": 17.467488021902806, "grad_norm": 5.886794567108154, "learning_rate": 1.2602739726027399e-07, "log_odds_chosen": 1.5487014055252075, "log_odds_ratio": -0.5122039318084717, "logits/chosen": 1.2641983032226562, "logits/rejected": 1.266251802444458, "logps/chosen": -2.5250167846679688, "logps/rejected": -3.98201322555542, "loss": 0.5924, "nll_loss": 0.5411555767059326, "rewards/accuracies": 0.75, "rewards/chosen": -0.2525016963481903, "rewards/margins": 0.1456996500492096, "rewards/rejected": -0.3982013165950775, "step": 6380 }, { "epoch": 17.470225872689937, "grad_norm": 4.2811808586120605, "learning_rate": 1.2589041095890409e-07, "log_odds_chosen": 3.0452656745910645, "log_odds_ratio": -0.18063008785247803, "logits/chosen": 1.0451503992080688, "logits/rejected": 1.0856728553771973, "logps/chosen": -1.8942352533340454, "logps/rejected": -4.772844314575195, "loss": 0.4826, "nll_loss": 0.4645686149597168, "rewards/accuracies": 1.0, "rewards/chosen": -0.18942353129386902, "rewards/margins": 0.2878609299659729, "rewards/rejected": -0.4772844910621643, "step": 6381 }, { "epoch": 17.472963723477072, "grad_norm": 4.712203502655029, "learning_rate": 1.2575342465753424e-07, "log_odds_chosen": 1.666763424873352, "log_odds_ratio": -0.2718138098716736, "logits/chosen": 1.0257331132888794, "logits/rejected": 1.0304228067398071, "logps/chosen": -2.21862530708313, "logps/rejected": -3.6720519065856934, "loss": 0.5277, "nll_loss": 0.5005590319633484, "rewards/accuracies": 1.0, "rewards/chosen": -0.2218625247478485, "rewards/margins": 0.14534267783164978, "rewards/rejected": -0.3672052025794983, "step": 6382 }, { "epoch": 17.475701574264203, "grad_norm": 3.748013973236084, "learning_rate": 1.2561643835616436e-07, "log_odds_chosen": 1.9295895099639893, "log_odds_ratio": -0.2589648365974426, "logits/chosen": 1.0604194402694702, "logits/rejected": 1.0352669954299927, "logps/chosen": -1.774604082107544, "logps/rejected": -3.514389991760254, "loss": 0.4596, "nll_loss": 0.4336959719657898, "rewards/accuracies": 1.0, "rewards/chosen": -0.17746040225028992, "rewards/margins": 0.17397859692573547, "rewards/rejected": -0.3514389991760254, "step": 6383 }, { "epoch": 17.478439425051334, "grad_norm": 3.786698818206787, "learning_rate": 1.2547945205479452e-07, "log_odds_chosen": 3.5795681476593018, "log_odds_ratio": -0.1817530244588852, "logits/chosen": 1.235652208328247, "logits/rejected": 1.194162130355835, "logps/chosen": -1.7444124221801758, "logps/rejected": -5.1570634841918945, "loss": 0.5117, "nll_loss": 0.49348193407058716, "rewards/accuracies": 1.0, "rewards/chosen": -0.17444124817848206, "rewards/margins": 0.34126511216163635, "rewards/rejected": -0.5157063603401184, "step": 6384 }, { "epoch": 17.481177275838466, "grad_norm": 3.806034803390503, "learning_rate": 1.2534246575342467e-07, "log_odds_chosen": 3.2916908264160156, "log_odds_ratio": -0.16448161005973816, "logits/chosen": 1.4251818656921387, "logits/rejected": 1.3955061435699463, "logps/chosen": -1.1927599906921387, "logps/rejected": -4.153383731842041, "loss": 0.3696, "nll_loss": 0.3531430661678314, "rewards/accuracies": 1.0, "rewards/chosen": -0.1192760020494461, "rewards/margins": 0.2960623502731323, "rewards/rejected": -0.41533833742141724, "step": 6385 }, { "epoch": 17.4839151266256, "grad_norm": 3.970196485519409, "learning_rate": 1.252054794520548e-07, "log_odds_chosen": 1.1379419565200806, "log_odds_ratio": -0.41653972864151, "logits/chosen": 1.489877700805664, "logits/rejected": 1.3752962350845337, "logps/chosen": -1.730576515197754, "logps/rejected": -2.7104947566986084, "loss": 0.4796, "nll_loss": 0.43795716762542725, "rewards/accuracies": 0.75, "rewards/chosen": -0.1730576604604721, "rewards/margins": 0.09799183160066605, "rewards/rejected": -0.27104949951171875, "step": 6386 }, { "epoch": 17.48665297741273, "grad_norm": 3.78066086769104, "learning_rate": 1.2506849315068494e-07, "log_odds_chosen": 3.874084234237671, "log_odds_ratio": -0.16386181116104126, "logits/chosen": 1.0056793689727783, "logits/rejected": 1.0190675258636475, "logps/chosen": -1.8636704683303833, "logps/rejected": -5.572089195251465, "loss": 0.6155, "nll_loss": 0.5990941524505615, "rewards/accuracies": 1.0, "rewards/chosen": -0.18636704981327057, "rewards/margins": 0.3708418607711792, "rewards/rejected": -0.5572088956832886, "step": 6387 }, { "epoch": 17.489390828199863, "grad_norm": 6.824924945831299, "learning_rate": 1.2493150684931507e-07, "log_odds_chosen": 3.0989010334014893, "log_odds_ratio": -0.12581808865070343, "logits/chosen": 1.2092595100402832, "logits/rejected": 1.2406965494155884, "logps/chosen": -2.1383979320526123, "logps/rejected": -5.095071315765381, "loss": 0.6106, "nll_loss": 0.5979750156402588, "rewards/accuracies": 1.0, "rewards/chosen": -0.2138397991657257, "rewards/margins": 0.2956673204898834, "rewards/rejected": -0.5095071196556091, "step": 6388 }, { "epoch": 17.492128678986994, "grad_norm": 3.3181164264678955, "learning_rate": 1.247945205479452e-07, "log_odds_chosen": 3.301208019256592, "log_odds_ratio": -0.1487559676170349, "logits/chosen": 1.2612066268920898, "logits/rejected": 1.285536527633667, "logps/chosen": -1.983167052268982, "logps/rejected": -5.1458635330200195, "loss": 0.5141, "nll_loss": 0.4992128610610962, "rewards/accuracies": 1.0, "rewards/chosen": -0.19831670820713043, "rewards/margins": 0.3162696659564972, "rewards/rejected": -0.514586329460144, "step": 6389 }, { "epoch": 17.49486652977413, "grad_norm": 7.010556697845459, "learning_rate": 1.2465753424657535e-07, "log_odds_chosen": 2.1855645179748535, "log_odds_ratio": -0.2162129133939743, "logits/chosen": 1.0977678298950195, "logits/rejected": 0.978919506072998, "logps/chosen": -2.357447385787964, "logps/rejected": -4.425779342651367, "loss": 0.6758, "nll_loss": 0.6541590690612793, "rewards/accuracies": 1.0, "rewards/chosen": -0.23574472963809967, "rewards/margins": 0.20683324337005615, "rewards/rejected": -0.44257795810699463, "step": 6390 }, { "epoch": 17.49760438056126, "grad_norm": 3.841637372970581, "learning_rate": 1.2452054794520547e-07, "log_odds_chosen": 3.4921984672546387, "log_odds_ratio": -0.17145591974258423, "logits/chosen": 1.0626463890075684, "logits/rejected": 1.0074729919433594, "logps/chosen": -1.8064732551574707, "logps/rejected": -5.123714447021484, "loss": 0.5017, "nll_loss": 0.4845632016658783, "rewards/accuracies": 1.0, "rewards/chosen": -0.18064731359481812, "rewards/margins": 0.3317241072654724, "rewards/rejected": -0.5123714208602905, "step": 6391 }, { "epoch": 17.50034223134839, "grad_norm": 3.5492072105407715, "learning_rate": 1.243835616438356e-07, "log_odds_chosen": 1.453901767730713, "log_odds_ratio": -0.2912455201148987, "logits/chosen": 1.2317824363708496, "logits/rejected": 1.2056231498718262, "logps/chosen": -1.4987974166870117, "logps/rejected": -2.7602005004882812, "loss": 0.3756, "nll_loss": 0.3464471101760864, "rewards/accuracies": 0.875, "rewards/chosen": -0.14987975358963013, "rewards/margins": 0.1261403113603592, "rewards/rejected": -0.2760200500488281, "step": 6392 }, { "epoch": 17.503080082135522, "grad_norm": 4.175602912902832, "learning_rate": 1.2424657534246575e-07, "log_odds_chosen": 2.6457438468933105, "log_odds_ratio": -0.20017468929290771, "logits/chosen": 1.2391892671585083, "logits/rejected": 1.2382527589797974, "logps/chosen": -2.404181480407715, "logps/rejected": -4.935752868652344, "loss": 0.5067, "nll_loss": 0.48670440912246704, "rewards/accuracies": 1.0, "rewards/chosen": -0.24041813611984253, "rewards/margins": 0.2531571388244629, "rewards/rejected": -0.4935752749443054, "step": 6393 }, { "epoch": 17.505817932922657, "grad_norm": 4.270005702972412, "learning_rate": 1.241095890410959e-07, "log_odds_chosen": 2.949375629425049, "log_odds_ratio": -0.1750842183828354, "logits/chosen": 1.0743409395217896, "logits/rejected": 1.0829412937164307, "logps/chosen": -1.9831202030181885, "logps/rejected": -4.74346399307251, "loss": 0.5364, "nll_loss": 0.518842875957489, "rewards/accuracies": 1.0, "rewards/chosen": -0.19831201434135437, "rewards/margins": 0.2760343551635742, "rewards/rejected": -0.474346399307251, "step": 6394 }, { "epoch": 17.508555783709788, "grad_norm": 3.884948253631592, "learning_rate": 1.2397260273972603e-07, "log_odds_chosen": 1.9681251049041748, "log_odds_ratio": -0.19959980249404907, "logits/chosen": 1.267884373664856, "logits/rejected": 1.2050915956497192, "logps/chosen": -1.2412755489349365, "logps/rejected": -2.882930040359497, "loss": 0.3789, "nll_loss": 0.358963280916214, "rewards/accuracies": 1.0, "rewards/chosen": -0.12412756681442261, "rewards/margins": 0.1641654372215271, "rewards/rejected": -0.2882930040359497, "step": 6395 }, { "epoch": 17.51129363449692, "grad_norm": 3.6079659461975098, "learning_rate": 1.2383561643835615e-07, "log_odds_chosen": 4.230658531188965, "log_odds_ratio": -0.12391430139541626, "logits/chosen": 0.7000643610954285, "logits/rejected": 0.776958703994751, "logps/chosen": -1.9916126728057861, "logps/rejected": -6.059475898742676, "loss": 0.7255, "nll_loss": 0.7131345868110657, "rewards/accuracies": 0.875, "rewards/chosen": -0.19916129112243652, "rewards/margins": 0.40678635239601135, "rewards/rejected": -0.6059475541114807, "step": 6396 }, { "epoch": 17.51403148528405, "grad_norm": 4.2550506591796875, "learning_rate": 1.236986301369863e-07, "log_odds_chosen": 3.917675018310547, "log_odds_ratio": -0.08574438095092773, "logits/chosen": 1.098858118057251, "logits/rejected": 1.017795443534851, "logps/chosen": -2.2693264484405518, "logps/rejected": -5.969200611114502, "loss": 0.6982, "nll_loss": 0.6896657347679138, "rewards/accuracies": 1.0, "rewards/chosen": -0.22693264484405518, "rewards/margins": 0.36998745799064636, "rewards/rejected": -0.5969200134277344, "step": 6397 }, { "epoch": 17.516769336071185, "grad_norm": 5.733023166656494, "learning_rate": 1.2356164383561643e-07, "log_odds_chosen": 2.480611801147461, "log_odds_ratio": -0.23092542588710785, "logits/chosen": 1.098076581954956, "logits/rejected": 1.0947641134262085, "logps/chosen": -1.9290783405303955, "logps/rejected": -4.1535162925720215, "loss": 0.4836, "nll_loss": 0.4605099558830261, "rewards/accuracies": 1.0, "rewards/chosen": -0.19290782511234283, "rewards/margins": 0.2224438488483429, "rewards/rejected": -0.41535162925720215, "step": 6398 }, { "epoch": 17.519507186858316, "grad_norm": 3.4002721309661865, "learning_rate": 1.2342465753424656e-07, "log_odds_chosen": 2.4541678428649902, "log_odds_ratio": -0.2078382968902588, "logits/chosen": 1.171829342842102, "logits/rejected": 1.1587047576904297, "logps/chosen": -1.6516042947769165, "logps/rejected": -3.939833402633667, "loss": 0.4795, "nll_loss": 0.45867133140563965, "rewards/accuracies": 1.0, "rewards/chosen": -0.1651604324579239, "rewards/margins": 0.22882291674613953, "rewards/rejected": -0.3939833641052246, "step": 6399 }, { "epoch": 17.522245037645447, "grad_norm": 4.828329086303711, "learning_rate": 1.232876712328767e-07, "log_odds_chosen": 1.8649035692214966, "log_odds_ratio": -0.3936619758605957, "logits/chosen": 0.9139423370361328, "logits/rejected": 0.9018669128417969, "logps/chosen": -1.8840676546096802, "logps/rejected": -3.614640951156616, "loss": 0.5079, "nll_loss": 0.46850770711898804, "rewards/accuracies": 0.75, "rewards/chosen": -0.18840676546096802, "rewards/margins": 0.17305733263492584, "rewards/rejected": -0.36146411299705505, "step": 6400 }, { "epoch": 17.524982888432582, "grad_norm": 4.219155788421631, "learning_rate": 1.2315068493150684e-07, "log_odds_chosen": 3.7888612747192383, "log_odds_ratio": -0.09792427718639374, "logits/chosen": 1.3462296724319458, "logits/rejected": 1.3977478742599487, "logps/chosen": -1.9700539112091064, "logps/rejected": -5.558196544647217, "loss": 0.6461, "nll_loss": 0.6363238096237183, "rewards/accuracies": 1.0, "rewards/chosen": -0.19700539112091064, "rewards/margins": 0.3588142395019531, "rewards/rejected": -0.5558196306228638, "step": 6401 }, { "epoch": 17.527720739219713, "grad_norm": 7.082913398742676, "learning_rate": 1.23013698630137e-07, "log_odds_chosen": 4.123362064361572, "log_odds_ratio": -0.460536390542984, "logits/chosen": 1.083399772644043, "logits/rejected": 1.092674970626831, "logps/chosen": -2.3002545833587646, "logps/rejected": -6.201552391052246, "loss": 0.6575, "nll_loss": 0.6114538908004761, "rewards/accuracies": 0.875, "rewards/chosen": -0.23002545535564423, "rewards/margins": 0.3901297450065613, "rewards/rejected": -0.6201552152633667, "step": 6402 }, { "epoch": 17.530458590006845, "grad_norm": 3.989656925201416, "learning_rate": 1.2287671232876711e-07, "log_odds_chosen": 3.2214951515197754, "log_odds_ratio": -0.35860902070999146, "logits/chosen": 0.9145570993423462, "logits/rejected": 0.9277331829071045, "logps/chosen": -1.9149997234344482, "logps/rejected": -4.927903175354004, "loss": 0.6864, "nll_loss": 0.6505541205406189, "rewards/accuracies": 0.75, "rewards/chosen": -0.1914999783039093, "rewards/margins": 0.3012904226779938, "rewards/rejected": -0.4927904009819031, "step": 6403 }, { "epoch": 17.533196440793976, "grad_norm": 3.536015033721924, "learning_rate": 1.2273972602739727e-07, "log_odds_chosen": 2.4382338523864746, "log_odds_ratio": -0.13324517011642456, "logits/chosen": 1.0109041929244995, "logits/rejected": 1.049670696258545, "logps/chosen": -1.4556264877319336, "logps/rejected": -3.5410518646240234, "loss": 0.4636, "nll_loss": 0.4502818286418915, "rewards/accuracies": 1.0, "rewards/chosen": -0.14556264877319336, "rewards/margins": 0.20854254066944122, "rewards/rejected": -0.3541052043437958, "step": 6404 }, { "epoch": 17.53593429158111, "grad_norm": 3.861204147338867, "learning_rate": 1.226027397260274e-07, "log_odds_chosen": 2.7543094158172607, "log_odds_ratio": -0.15774628520011902, "logits/chosen": 0.8848510384559631, "logits/rejected": 0.8654932975769043, "logps/chosen": -1.8506639003753662, "logps/rejected": -4.433710098266602, "loss": 0.5307, "nll_loss": 0.514933168888092, "rewards/accuracies": 1.0, "rewards/chosen": -0.18506640195846558, "rewards/margins": 0.258304625749588, "rewards/rejected": -0.4433709979057312, "step": 6405 }, { "epoch": 17.53867214236824, "grad_norm": 11.352410316467285, "learning_rate": 1.2246575342465752e-07, "log_odds_chosen": 2.4500913619995117, "log_odds_ratio": -0.3152150809764862, "logits/chosen": 1.2715516090393066, "logits/rejected": 1.299220323562622, "logps/chosen": -2.917497158050537, "logps/rejected": -5.267799377441406, "loss": 0.6332, "nll_loss": 0.6017025709152222, "rewards/accuracies": 0.875, "rewards/chosen": -0.2917497158050537, "rewards/margins": 0.23503021895885468, "rewards/rejected": -0.5267798900604248, "step": 6406 }, { "epoch": 17.541409993155373, "grad_norm": 3.794285297393799, "learning_rate": 1.2232876712328767e-07, "log_odds_chosen": 2.155700922012329, "log_odds_ratio": -0.17749154567718506, "logits/chosen": 0.9500089287757874, "logits/rejected": 0.9344651699066162, "logps/chosen": -2.126033306121826, "logps/rejected": -4.111349105834961, "loss": 0.6195, "nll_loss": 0.6017487049102783, "rewards/accuracies": 1.0, "rewards/chosen": -0.2126033455133438, "rewards/margins": 0.1985315978527069, "rewards/rejected": -0.4111349284648895, "step": 6407 }, { "epoch": 17.544147843942504, "grad_norm": 8.951386451721191, "learning_rate": 1.221917808219178e-07, "log_odds_chosen": 1.294980764389038, "log_odds_ratio": -1.000089168548584, "logits/chosen": 0.8931159377098083, "logits/rejected": 0.8516647815704346, "logps/chosen": -3.1291728019714355, "logps/rejected": -4.333714485168457, "loss": 0.6294, "nll_loss": 0.529413640499115, "rewards/accuracies": 0.625, "rewards/chosen": -0.3129172921180725, "rewards/margins": 0.12045414745807648, "rewards/rejected": -0.43337148427963257, "step": 6408 }, { "epoch": 17.54688569472964, "grad_norm": 3.6261563301086426, "learning_rate": 1.2205479452054795e-07, "log_odds_chosen": 3.3459901809692383, "log_odds_ratio": -0.30466020107269287, "logits/chosen": 1.2772027254104614, "logits/rejected": 1.2776223421096802, "logps/chosen": -1.7008610963821411, "logps/rejected": -4.93349552154541, "loss": 0.5012, "nll_loss": 0.47075721621513367, "rewards/accuracies": 0.875, "rewards/chosen": -0.1700861155986786, "rewards/margins": 0.3232634663581848, "rewards/rejected": -0.493349552154541, "step": 6409 }, { "epoch": 17.54962354551677, "grad_norm": 11.597816467285156, "learning_rate": 1.2191780821917807e-07, "log_odds_chosen": 1.919189453125, "log_odds_ratio": -0.41918841004371643, "logits/chosen": 1.293165683746338, "logits/rejected": 1.257304072380066, "logps/chosen": -2.3669581413269043, "logps/rejected": -4.163928031921387, "loss": 0.6144, "nll_loss": 0.5725299119949341, "rewards/accuracies": 0.875, "rewards/chosen": -0.23669582605361938, "rewards/margins": 0.1796969771385193, "rewards/rejected": -0.41639280319213867, "step": 6410 }, { "epoch": 17.5523613963039, "grad_norm": 6.422168731689453, "learning_rate": 1.217808219178082e-07, "log_odds_chosen": 2.3359310626983643, "log_odds_ratio": -0.2638101577758789, "logits/chosen": 1.4428720474243164, "logits/rejected": 1.4202792644500732, "logps/chosen": -2.297976493835449, "logps/rejected": -4.432254791259766, "loss": 0.5204, "nll_loss": 0.4940569996833801, "rewards/accuracies": 0.875, "rewards/chosen": -0.22979766130447388, "rewards/margins": 0.21342778205871582, "rewards/rejected": -0.4432254433631897, "step": 6411 }, { "epoch": 17.555099247091032, "grad_norm": 5.304293632507324, "learning_rate": 1.2164383561643835e-07, "log_odds_chosen": 2.6704185009002686, "log_odds_ratio": -0.1625816822052002, "logits/chosen": 1.2816818952560425, "logits/rejected": 1.2848209142684937, "logps/chosen": -2.128628969192505, "logps/rejected": -4.591707229614258, "loss": 0.5102, "nll_loss": 0.49392926692962646, "rewards/accuracies": 1.0, "rewards/chosen": -0.21286289393901825, "rewards/margins": 0.246307834982872, "rewards/rejected": -0.45917072892189026, "step": 6412 }, { "epoch": 17.557837097878167, "grad_norm": 6.163503646850586, "learning_rate": 1.215068493150685e-07, "log_odds_chosen": 1.7214391231536865, "log_odds_ratio": -0.2540701627731323, "logits/chosen": 1.2509998083114624, "logits/rejected": 1.2073304653167725, "logps/chosen": -2.1193480491638184, "logps/rejected": -3.693061113357544, "loss": 0.5574, "nll_loss": 0.531974732875824, "rewards/accuracies": 1.0, "rewards/chosen": -0.21193480491638184, "rewards/margins": 0.15737134218215942, "rewards/rejected": -0.36930614709854126, "step": 6413 }, { "epoch": 17.560574948665298, "grad_norm": 3.752497911453247, "learning_rate": 1.2136986301369863e-07, "log_odds_chosen": 4.67992639541626, "log_odds_ratio": -0.17666968703269958, "logits/chosen": 0.9795989990234375, "logits/rejected": 1.003127098083496, "logps/chosen": -2.0321097373962402, "logps/rejected": -6.465464115142822, "loss": 0.6111, "nll_loss": 0.593464732170105, "rewards/accuracies": 0.875, "rewards/chosen": -0.2032109647989273, "rewards/margins": 0.4433354437351227, "rewards/rejected": -0.6465463638305664, "step": 6414 }, { "epoch": 17.56331279945243, "grad_norm": 3.9049880504608154, "learning_rate": 1.2123287671232875e-07, "log_odds_chosen": 2.9943785667419434, "log_odds_ratio": -0.16784334182739258, "logits/chosen": 0.9937404990196228, "logits/rejected": 1.0387827157974243, "logps/chosen": -2.074819564819336, "logps/rejected": -4.925542831420898, "loss": 0.5805, "nll_loss": 0.5637549161911011, "rewards/accuracies": 1.0, "rewards/chosen": -0.2074819803237915, "rewards/margins": 0.28507235646247864, "rewards/rejected": -0.49255430698394775, "step": 6415 }, { "epoch": 17.56605065023956, "grad_norm": 5.1052093505859375, "learning_rate": 1.210958904109589e-07, "log_odds_chosen": 1.7961652278900146, "log_odds_ratio": -0.36308038234710693, "logits/chosen": 0.9529353976249695, "logits/rejected": 0.8805729746818542, "logps/chosen": -2.121525764465332, "logps/rejected": -3.768601417541504, "loss": 0.5889, "nll_loss": 0.5525469779968262, "rewards/accuracies": 0.875, "rewards/chosen": -0.21215258538722992, "rewards/margins": 0.16470755636692047, "rewards/rejected": -0.3768601417541504, "step": 6416 }, { "epoch": 17.568788501026695, "grad_norm": 3.651517391204834, "learning_rate": 1.2095890410958903e-07, "log_odds_chosen": 2.233306646347046, "log_odds_ratio": -0.20337733626365662, "logits/chosen": 1.2842254638671875, "logits/rejected": 1.3135693073272705, "logps/chosen": -1.7080178260803223, "logps/rejected": -3.763725757598877, "loss": 0.4857, "nll_loss": 0.4653513431549072, "rewards/accuracies": 1.0, "rewards/chosen": -0.1708017885684967, "rewards/margins": 0.20557080209255219, "rewards/rejected": -0.3763726055622101, "step": 6417 }, { "epoch": 17.571526351813826, "grad_norm": 3.3128926753997803, "learning_rate": 1.2082191780821916e-07, "log_odds_chosen": 2.2485456466674805, "log_odds_ratio": -0.15955401957035065, "logits/chosen": 0.9157711267471313, "logits/rejected": 0.817060112953186, "logps/chosen": -1.3198895454406738, "logps/rejected": -3.3070552349090576, "loss": 0.4746, "nll_loss": 0.45865121483802795, "rewards/accuracies": 1.0, "rewards/chosen": -0.13198894262313843, "rewards/margins": 0.19871659576892853, "rewards/rejected": -0.33070552349090576, "step": 6418 }, { "epoch": 17.574264202600958, "grad_norm": 3.4524669647216797, "learning_rate": 1.206849315068493e-07, "log_odds_chosen": 2.7618865966796875, "log_odds_ratio": -0.20471300184726715, "logits/chosen": 1.0243213176727295, "logits/rejected": 1.0092558860778809, "logps/chosen": -1.8794593811035156, "logps/rejected": -4.521390914916992, "loss": 0.4894, "nll_loss": 0.46897628903388977, "rewards/accuracies": 0.875, "rewards/chosen": -0.18794594705104828, "rewards/margins": 0.2641931474208832, "rewards/rejected": -0.45213907957077026, "step": 6419 }, { "epoch": 17.57700205338809, "grad_norm": 3.2285678386688232, "learning_rate": 1.2054794520547946e-07, "log_odds_chosen": 3.234673023223877, "log_odds_ratio": -0.20630262792110443, "logits/chosen": 0.9038516283035278, "logits/rejected": 0.9184706211090088, "logps/chosen": -1.7023475170135498, "logps/rejected": -4.7411956787109375, "loss": 0.4902, "nll_loss": 0.4695911407470703, "rewards/accuracies": 1.0, "rewards/chosen": -0.1702347695827484, "rewards/margins": 0.3038848638534546, "rewards/rejected": -0.474119633436203, "step": 6420 }, { "epoch": 17.579739904175224, "grad_norm": 3.0472443103790283, "learning_rate": 1.2041095890410959e-07, "log_odds_chosen": 2.9670424461364746, "log_odds_ratio": -0.1711314618587494, "logits/chosen": 1.3166258335113525, "logits/rejected": 1.2900362014770508, "logps/chosen": -1.3972584009170532, "logps/rejected": -3.9862124919891357, "loss": 0.4611, "nll_loss": 0.4439737796783447, "rewards/accuracies": 0.875, "rewards/chosen": -0.13972583413124084, "rewards/margins": 0.2588954269886017, "rewards/rejected": -0.39862126111984253, "step": 6421 }, { "epoch": 17.582477754962355, "grad_norm": 4.196135520935059, "learning_rate": 1.202739726027397e-07, "log_odds_chosen": 1.9258358478546143, "log_odds_ratio": -0.272249311208725, "logits/chosen": 1.1422017812728882, "logits/rejected": 1.100503921508789, "logps/chosen": -1.4301868677139282, "logps/rejected": -3.1159636974334717, "loss": 0.6027, "nll_loss": 0.5754793286323547, "rewards/accuracies": 1.0, "rewards/chosen": -0.1430186927318573, "rewards/margins": 0.16857768595218658, "rewards/rejected": -0.3115963637828827, "step": 6422 }, { "epoch": 17.585215605749486, "grad_norm": 3.7148056030273438, "learning_rate": 1.2013698630136986e-07, "log_odds_chosen": 3.332932710647583, "log_odds_ratio": -0.17398998141288757, "logits/chosen": 0.9097051620483398, "logits/rejected": 0.8487812876701355, "logps/chosen": -1.4177731275558472, "logps/rejected": -4.525364875793457, "loss": 0.4676, "nll_loss": 0.45020484924316406, "rewards/accuracies": 1.0, "rewards/chosen": -0.14177732169628143, "rewards/margins": 0.31075921654701233, "rewards/rejected": -0.45253652334213257, "step": 6423 }, { "epoch": 17.587953456536617, "grad_norm": 4.020582675933838, "learning_rate": 1.2e-07, "log_odds_chosen": 2.535632371902466, "log_odds_ratio": -0.21444368362426758, "logits/chosen": 1.159326434135437, "logits/rejected": 1.1393343210220337, "logps/chosen": -2.159964084625244, "logps/rejected": -4.574800491333008, "loss": 0.46, "nll_loss": 0.43855321407318115, "rewards/accuracies": 1.0, "rewards/chosen": -0.21599644422531128, "rewards/margins": 0.2414836287498474, "rewards/rejected": -0.4574800729751587, "step": 6424 }, { "epoch": 17.590691307323752, "grad_norm": 3.9517600536346436, "learning_rate": 1.1986301369863011e-07, "log_odds_chosen": 3.057391881942749, "log_odds_ratio": -0.13649359345436096, "logits/chosen": 1.2000302076339722, "logits/rejected": 1.233119249343872, "logps/chosen": -1.7231152057647705, "logps/rejected": -4.529988765716553, "loss": 0.4906, "nll_loss": 0.47690504789352417, "rewards/accuracies": 1.0, "rewards/chosen": -0.17231151461601257, "rewards/margins": 0.2806873917579651, "rewards/rejected": -0.4529988765716553, "step": 6425 }, { "epoch": 17.593429158110883, "grad_norm": 3.969658851623535, "learning_rate": 1.1972602739726027e-07, "log_odds_chosen": 1.3446910381317139, "log_odds_ratio": -0.31348681449890137, "logits/chosen": 1.096893072128296, "logits/rejected": 1.026545763015747, "logps/chosen": -1.5670549869537354, "logps/rejected": -2.7343273162841797, "loss": 0.4355, "nll_loss": 0.4041113257408142, "rewards/accuracies": 1.0, "rewards/chosen": -0.15670549869537354, "rewards/margins": 0.11672723293304443, "rewards/rejected": -0.27343273162841797, "step": 6426 }, { "epoch": 17.596167008898014, "grad_norm": 7.284890174865723, "learning_rate": 1.1958904109589042e-07, "log_odds_chosen": 2.688382148742676, "log_odds_ratio": -0.5596777200698853, "logits/chosen": 1.0766831636428833, "logits/rejected": 1.0852549076080322, "logps/chosen": -1.7933292388916016, "logps/rejected": -4.29844856262207, "loss": 0.7212, "nll_loss": 0.6652526259422302, "rewards/accuracies": 0.75, "rewards/chosen": -0.1793329119682312, "rewards/margins": 0.2505119740962982, "rewards/rejected": -0.4298449158668518, "step": 6427 }, { "epoch": 17.59890485968515, "grad_norm": 4.833075046539307, "learning_rate": 1.1945205479452054e-07, "log_odds_chosen": 5.417034149169922, "log_odds_ratio": -0.11492954194545746, "logits/chosen": 1.232452392578125, "logits/rejected": 1.1820496320724487, "logps/chosen": -1.9096448421478271, "logps/rejected": -7.085201263427734, "loss": 0.6831, "nll_loss": 0.6716293096542358, "rewards/accuracies": 1.0, "rewards/chosen": -0.190964475274086, "rewards/margins": 0.5175557136535645, "rewards/rejected": -0.7085201740264893, "step": 6428 }, { "epoch": 17.60164271047228, "grad_norm": 3.98264741897583, "learning_rate": 1.1931506849315067e-07, "log_odds_chosen": 3.6035983562469482, "log_odds_ratio": -0.11641956120729446, "logits/chosen": 1.347528100013733, "logits/rejected": 1.3411979675292969, "logps/chosen": -1.7425822019577026, "logps/rejected": -5.14951229095459, "loss": 0.4871, "nll_loss": 0.4754147529602051, "rewards/accuracies": 1.0, "rewards/chosen": -0.17425823211669922, "rewards/margins": 0.34069299697875977, "rewards/rejected": -0.514951229095459, "step": 6429 }, { "epoch": 17.60438056125941, "grad_norm": 3.8813259601593018, "learning_rate": 1.1917808219178081e-07, "log_odds_chosen": 1.8640697002410889, "log_odds_ratio": -0.21526291966438293, "logits/chosen": 1.1264578104019165, "logits/rejected": 1.0869450569152832, "logps/chosen": -1.6900038719177246, "logps/rejected": -3.2339932918548584, "loss": 0.4459, "nll_loss": 0.4243628978729248, "rewards/accuracies": 0.875, "rewards/chosen": -0.16900038719177246, "rewards/margins": 0.15439896285533905, "rewards/rejected": -0.3233993351459503, "step": 6430 }, { "epoch": 17.607118412046542, "grad_norm": 3.448453426361084, "learning_rate": 1.1904109589041095e-07, "log_odds_chosen": 2.7248430252075195, "log_odds_ratio": -0.25425243377685547, "logits/chosen": 1.1810497045516968, "logits/rejected": 1.0842365026474, "logps/chosen": -1.5283432006835938, "logps/rejected": -4.031461715698242, "loss": 0.5356, "nll_loss": 0.5101603865623474, "rewards/accuracies": 1.0, "rewards/chosen": -0.15283432602882385, "rewards/margins": 0.25031185150146484, "rewards/rejected": -0.4031461477279663, "step": 6431 }, { "epoch": 17.609856262833677, "grad_norm": 3.9565703868865967, "learning_rate": 1.189041095890411e-07, "log_odds_chosen": 3.4753501415252686, "log_odds_ratio": -0.1479869931936264, "logits/chosen": 1.208261251449585, "logits/rejected": 1.2718571424484253, "logps/chosen": -1.8970789909362793, "logps/rejected": -5.190866470336914, "loss": 0.4826, "nll_loss": 0.4678438603878021, "rewards/accuracies": 1.0, "rewards/chosen": -0.1897079050540924, "rewards/margins": 0.32937875390052795, "rewards/rejected": -0.5190865993499756, "step": 6432 }, { "epoch": 17.61259411362081, "grad_norm": 3.9019389152526855, "learning_rate": 1.1876712328767123e-07, "log_odds_chosen": 3.811748504638672, "log_odds_ratio": -0.13546741008758545, "logits/chosen": 1.0665138959884644, "logits/rejected": 1.097917079925537, "logps/chosen": -2.0305848121643066, "logps/rejected": -5.718442916870117, "loss": 0.5673, "nll_loss": 0.553706705570221, "rewards/accuracies": 1.0, "rewards/chosen": -0.20305848121643066, "rewards/margins": 0.3687858283519745, "rewards/rejected": -0.5718443393707275, "step": 6433 }, { "epoch": 17.61533196440794, "grad_norm": 3.827183723449707, "learning_rate": 1.1863013698630136e-07, "log_odds_chosen": 2.1735947132110596, "log_odds_ratio": -0.18408271670341492, "logits/chosen": 1.1573371887207031, "logits/rejected": 1.100013017654419, "logps/chosen": -1.5067856311798096, "logps/rejected": -3.363523006439209, "loss": 0.5007, "nll_loss": 0.48232942819595337, "rewards/accuracies": 1.0, "rewards/chosen": -0.1506785750389099, "rewards/margins": 0.18567371368408203, "rewards/rejected": -0.33635228872299194, "step": 6434 }, { "epoch": 17.61806981519507, "grad_norm": 3.8345510959625244, "learning_rate": 1.184931506849315e-07, "log_odds_chosen": 3.9033119678497314, "log_odds_ratio": -0.16472050547599792, "logits/chosen": 1.2408033609390259, "logits/rejected": 1.2914499044418335, "logps/chosen": -1.967125654220581, "logps/rejected": -5.68170166015625, "loss": 0.7268, "nll_loss": 0.710376501083374, "rewards/accuracies": 1.0, "rewards/chosen": -0.19671256840229034, "rewards/margins": 0.37145760655403137, "rewards/rejected": -0.5681701898574829, "step": 6435 }, { "epoch": 17.620807665982205, "grad_norm": 3.9225082397460938, "learning_rate": 1.1835616438356163e-07, "log_odds_chosen": 1.9102697372436523, "log_odds_ratio": -0.25493645668029785, "logits/chosen": 1.0967543125152588, "logits/rejected": 1.076030969619751, "logps/chosen": -1.6075019836425781, "logps/rejected": -3.332935333251953, "loss": 0.4939, "nll_loss": 0.46845418214797974, "rewards/accuracies": 0.875, "rewards/chosen": -0.16075018048286438, "rewards/margins": 0.17254337668418884, "rewards/rejected": -0.3332935571670532, "step": 6436 }, { "epoch": 17.623545516769337, "grad_norm": 4.171420574188232, "learning_rate": 1.1821917808219177e-07, "log_odds_chosen": 2.9530653953552246, "log_odds_ratio": -0.22545845806598663, "logits/chosen": 0.987865149974823, "logits/rejected": 0.9946181178092957, "logps/chosen": -2.245351791381836, "logps/rejected": -5.015536785125732, "loss": 0.6247, "nll_loss": 0.6021814346313477, "rewards/accuracies": 1.0, "rewards/chosen": -0.22453518211841583, "rewards/margins": 0.2770185172557831, "rewards/rejected": -0.5015537142753601, "step": 6437 }, { "epoch": 17.626283367556468, "grad_norm": 3.730733633041382, "learning_rate": 1.1808219178082192e-07, "log_odds_chosen": 2.133314609527588, "log_odds_ratio": -0.23541469871997833, "logits/chosen": 1.2414073944091797, "logits/rejected": 1.2523603439331055, "logps/chosen": -1.6890705823898315, "logps/rejected": -3.651268482208252, "loss": 0.4851, "nll_loss": 0.46154338121414185, "rewards/accuracies": 1.0, "rewards/chosen": -0.1689070612192154, "rewards/margins": 0.196219801902771, "rewards/rejected": -0.3651268482208252, "step": 6438 }, { "epoch": 17.6290212183436, "grad_norm": 8.984489440917969, "learning_rate": 1.1794520547945206e-07, "log_odds_chosen": 2.0303609371185303, "log_odds_ratio": -0.7389398813247681, "logits/chosen": 1.000688910484314, "logits/rejected": 1.1365923881530762, "logps/chosen": -3.1731650829315186, "logps/rejected": -5.015429496765137, "loss": 0.6723, "nll_loss": 0.5984156131744385, "rewards/accuracies": 0.75, "rewards/chosen": -0.31731653213500977, "rewards/margins": 0.18422642350196838, "rewards/rejected": -0.5015429258346558, "step": 6439 }, { "epoch": 17.631759069130734, "grad_norm": 4.5014777183532715, "learning_rate": 1.1780821917808218e-07, "log_odds_chosen": 3.0998928546905518, "log_odds_ratio": -0.23126307129859924, "logits/chosen": 0.8493586182594299, "logits/rejected": 0.8150084614753723, "logps/chosen": -2.5977745056152344, "logps/rejected": -5.534125804901123, "loss": 0.5351, "nll_loss": 0.5119912624359131, "rewards/accuracies": 0.875, "rewards/chosen": -0.2597774565219879, "rewards/margins": 0.2936351001262665, "rewards/rejected": -0.5534126162528992, "step": 6440 }, { "epoch": 17.634496919917865, "grad_norm": 3.7988405227661133, "learning_rate": 1.1767123287671232e-07, "log_odds_chosen": 2.925682544708252, "log_odds_ratio": -0.18850691616535187, "logits/chosen": 0.8376128077507019, "logits/rejected": 0.8238862156867981, "logps/chosen": -2.0460495948791504, "logps/rejected": -4.842023849487305, "loss": 0.5142, "nll_loss": 0.4953542947769165, "rewards/accuracies": 1.0, "rewards/chosen": -0.20460495352745056, "rewards/margins": 0.2795974314212799, "rewards/rejected": -0.48420238494873047, "step": 6441 }, { "epoch": 17.637234770704996, "grad_norm": 4.3837056159973145, "learning_rate": 1.1753424657534246e-07, "log_odds_chosen": 2.9241137504577637, "log_odds_ratio": -0.2368427813053131, "logits/chosen": 1.1506340503692627, "logits/rejected": 1.2053840160369873, "logps/chosen": -1.9629836082458496, "logps/rejected": -4.690064430236816, "loss": 0.5623, "nll_loss": 0.5385768413543701, "rewards/accuracies": 1.0, "rewards/chosen": -0.19629836082458496, "rewards/margins": 0.27270808815956116, "rewards/rejected": -0.4690064489841461, "step": 6442 }, { "epoch": 17.639972621492127, "grad_norm": 5.667377471923828, "learning_rate": 1.1739726027397259e-07, "log_odds_chosen": 2.4655585289001465, "log_odds_ratio": -0.38343751430511475, "logits/chosen": 1.0283483266830444, "logits/rejected": 1.1055028438568115, "logps/chosen": -2.2932493686676025, "logps/rejected": -4.574119567871094, "loss": 0.562, "nll_loss": 0.5236859321594238, "rewards/accuracies": 0.875, "rewards/chosen": -0.22932493686676025, "rewards/margins": 0.22808699309825897, "rewards/rejected": -0.4574119448661804, "step": 6443 }, { "epoch": 17.642710472279262, "grad_norm": 3.962702512741089, "learning_rate": 1.1726027397260274e-07, "log_odds_chosen": 3.1793577671051025, "log_odds_ratio": -0.20942413806915283, "logits/chosen": 0.7858189940452576, "logits/rejected": 0.7257786989212036, "logps/chosen": -1.5159201622009277, "logps/rejected": -4.471437931060791, "loss": 0.4589, "nll_loss": 0.4379153847694397, "rewards/accuracies": 1.0, "rewards/chosen": -0.15159201622009277, "rewards/margins": 0.2955518066883087, "rewards/rejected": -0.4471437931060791, "step": 6444 }, { "epoch": 17.645448323066393, "grad_norm": 4.074045658111572, "learning_rate": 1.1712328767123288e-07, "log_odds_chosen": 2.977226972579956, "log_odds_ratio": -0.1657978892326355, "logits/chosen": 0.9413710832595825, "logits/rejected": 0.9488972425460815, "logps/chosen": -1.7026078701019287, "logps/rejected": -4.443038463592529, "loss": 0.5257, "nll_loss": 0.5091380476951599, "rewards/accuracies": 1.0, "rewards/chosen": -0.17026078701019287, "rewards/margins": 0.27404308319091797, "rewards/rejected": -0.44430387020111084, "step": 6445 }, { "epoch": 17.648186173853524, "grad_norm": 9.920320510864258, "learning_rate": 1.1698630136986302e-07, "log_odds_chosen": 1.3471457958221436, "log_odds_ratio": -0.7603244185447693, "logits/chosen": 1.1442559957504272, "logits/rejected": 1.143253207206726, "logps/chosen": -2.6044416427612305, "logps/rejected": -3.815101146697998, "loss": 0.6181, "nll_loss": 0.54206782579422, "rewards/accuracies": 0.75, "rewards/chosen": -0.26044416427612305, "rewards/margins": 0.12106596678495407, "rewards/rejected": -0.38151007890701294, "step": 6446 }, { "epoch": 17.650924024640656, "grad_norm": 3.8516552448272705, "learning_rate": 1.1684931506849314e-07, "log_odds_chosen": 3.712149143218994, "log_odds_ratio": -0.21972651779651642, "logits/chosen": 0.7833691835403442, "logits/rejected": 0.7788863182067871, "logps/chosen": -1.999851107597351, "logps/rejected": -5.550516128540039, "loss": 0.5488, "nll_loss": 0.5268605947494507, "rewards/accuracies": 1.0, "rewards/chosen": -0.19998511672019958, "rewards/margins": 0.3550665080547333, "rewards/rejected": -0.5550516247749329, "step": 6447 }, { "epoch": 17.65366187542779, "grad_norm": 3.1821298599243164, "learning_rate": 1.1671232876712328e-07, "log_odds_chosen": 3.2132632732391357, "log_odds_ratio": -0.15562847256660461, "logits/chosen": 0.9606966972351074, "logits/rejected": 0.9584157466888428, "logps/chosen": -1.5553715229034424, "logps/rejected": -4.544435977935791, "loss": 0.4676, "nll_loss": 0.4520705044269562, "rewards/accuracies": 1.0, "rewards/chosen": -0.15553715825080872, "rewards/margins": 0.29890644550323486, "rewards/rejected": -0.4544436037540436, "step": 6448 }, { "epoch": 17.65639972621492, "grad_norm": 7.10005521774292, "learning_rate": 1.1657534246575342e-07, "log_odds_chosen": 1.1174250841140747, "log_odds_ratio": -0.4788739085197449, "logits/chosen": 1.0744061470031738, "logits/rejected": 0.9936478734016418, "logps/chosen": -2.509838342666626, "logps/rejected": -3.51438570022583, "loss": 0.4913, "nll_loss": 0.44345808029174805, "rewards/accuracies": 0.875, "rewards/chosen": -0.2509838342666626, "rewards/margins": 0.10045471787452698, "rewards/rejected": -0.3514385521411896, "step": 6449 }, { "epoch": 17.659137577002053, "grad_norm": 3.5388777256011963, "learning_rate": 1.1643835616438355e-07, "log_odds_chosen": 3.4979348182678223, "log_odds_ratio": -0.1478625237941742, "logits/chosen": 1.3402936458587646, "logits/rejected": 1.402801513671875, "logps/chosen": -1.996779203414917, "logps/rejected": -5.35540771484375, "loss": 0.5482, "nll_loss": 0.5334317684173584, "rewards/accuracies": 1.0, "rewards/chosen": -0.19967791438102722, "rewards/margins": 0.33586281538009644, "rewards/rejected": -0.5355408191680908, "step": 6450 }, { "epoch": 17.661875427789184, "grad_norm": 4.075673580169678, "learning_rate": 1.163013698630137e-07, "log_odds_chosen": 2.578423500061035, "log_odds_ratio": -0.19741526246070862, "logits/chosen": 0.9534366130828857, "logits/rejected": 0.9989303350448608, "logps/chosen": -1.995596170425415, "logps/rejected": -4.440369606018066, "loss": 0.5733, "nll_loss": 0.5535117983818054, "rewards/accuracies": 1.0, "rewards/chosen": -0.19955961406230927, "rewards/margins": 0.24447733163833618, "rewards/rejected": -0.44403693079948425, "step": 6451 }, { "epoch": 17.66461327857632, "grad_norm": 16.839780807495117, "learning_rate": 1.1616438356164384e-07, "log_odds_chosen": 4.287539482116699, "log_odds_ratio": -0.32520046830177307, "logits/chosen": 1.2566028833389282, "logits/rejected": 1.3059070110321045, "logps/chosen": -2.356729030609131, "logps/rejected": -6.505078315734863, "loss": 0.7313, "nll_loss": 0.6988298296928406, "rewards/accuracies": 0.875, "rewards/chosen": -0.23567292094230652, "rewards/margins": 0.41483497619628906, "rewards/rejected": -0.6505078673362732, "step": 6452 }, { "epoch": 17.66735112936345, "grad_norm": 7.409850120544434, "learning_rate": 1.1602739726027396e-07, "log_odds_chosen": 1.5270209312438965, "log_odds_ratio": -0.459993451833725, "logits/chosen": 1.0856705904006958, "logits/rejected": 1.0707595348358154, "logps/chosen": -2.589357614517212, "logps/rejected": -3.9572386741638184, "loss": 0.5237, "nll_loss": 0.47768908739089966, "rewards/accuracies": 0.875, "rewards/chosen": -0.2589357793331146, "rewards/margins": 0.13678810000419617, "rewards/rejected": -0.3957238793373108, "step": 6453 }, { "epoch": 17.67008898015058, "grad_norm": 3.6225051879882812, "learning_rate": 1.158904109589041e-07, "log_odds_chosen": 4.537496089935303, "log_odds_ratio": -0.05433330312371254, "logits/chosen": 1.1130931377410889, "logits/rejected": 1.1299569606781006, "logps/chosen": -1.8070694208145142, "logps/rejected": -6.12086296081543, "loss": 0.5857, "nll_loss": 0.5802567601203918, "rewards/accuracies": 1.0, "rewards/chosen": -0.1807069480419159, "rewards/margins": 0.4313793182373047, "rewards/rejected": -0.612086296081543, "step": 6454 }, { "epoch": 17.672826830937716, "grad_norm": 3.5536932945251465, "learning_rate": 1.1575342465753424e-07, "log_odds_chosen": 4.474071502685547, "log_odds_ratio": -0.18414324522018433, "logits/chosen": 1.3380649089813232, "logits/rejected": 1.3574224710464478, "logps/chosen": -1.8829765319824219, "logps/rejected": -6.204547882080078, "loss": 0.5823, "nll_loss": 0.563856840133667, "rewards/accuracies": 1.0, "rewards/chosen": -0.18829765915870667, "rewards/margins": 0.43215715885162354, "rewards/rejected": -0.6204547882080078, "step": 6455 }, { "epoch": 17.675564681724847, "grad_norm": 3.839094400405884, "learning_rate": 1.1561643835616437e-07, "log_odds_chosen": 3.9971117973327637, "log_odds_ratio": -0.13022932410240173, "logits/chosen": 0.9569324851036072, "logits/rejected": 0.9751958847045898, "logps/chosen": -2.0304818153381348, "logps/rejected": -5.890142440795898, "loss": 0.6285, "nll_loss": 0.6154969334602356, "rewards/accuracies": 0.875, "rewards/chosen": -0.20304818451404572, "rewards/margins": 0.38596606254577637, "rewards/rejected": -0.5890142917633057, "step": 6456 }, { "epoch": 17.678302532511978, "grad_norm": 3.158240795135498, "learning_rate": 1.1547945205479452e-07, "log_odds_chosen": 3.5339760780334473, "log_odds_ratio": -0.17622682452201843, "logits/chosen": 1.1389901638031006, "logits/rejected": 1.1728098392486572, "logps/chosen": -1.5878262519836426, "logps/rejected": -4.905841827392578, "loss": 0.5144, "nll_loss": 0.4967818260192871, "rewards/accuracies": 0.875, "rewards/chosen": -0.15878263115882874, "rewards/margins": 0.3318015933036804, "rewards/rejected": -0.49058419466018677, "step": 6457 }, { "epoch": 17.68104038329911, "grad_norm": 3.7185263633728027, "learning_rate": 1.1534246575342466e-07, "log_odds_chosen": 3.15109920501709, "log_odds_ratio": -0.11055994033813477, "logits/chosen": 1.2967252731323242, "logits/rejected": 1.2402302026748657, "logps/chosen": -1.9226270914077759, "logps/rejected": -4.835298538208008, "loss": 0.4951, "nll_loss": 0.4839985966682434, "rewards/accuracies": 1.0, "rewards/chosen": -0.1922627091407776, "rewards/margins": 0.29126712679862976, "rewards/rejected": -0.48352980613708496, "step": 6458 }, { "epoch": 17.683778234086244, "grad_norm": 9.63099193572998, "learning_rate": 1.152054794520548e-07, "log_odds_chosen": 4.03657865524292, "log_odds_ratio": -0.19623181223869324, "logits/chosen": 1.0325706005096436, "logits/rejected": 1.0105853080749512, "logps/chosen": -2.425307512283325, "logps/rejected": -6.344456672668457, "loss": 0.6572, "nll_loss": 0.6376023292541504, "rewards/accuracies": 0.875, "rewards/chosen": -0.24253076314926147, "rewards/margins": 0.39191490411758423, "rewards/rejected": -0.6344456672668457, "step": 6459 }, { "epoch": 17.686516084873375, "grad_norm": 6.244695663452148, "learning_rate": 1.1506849315068492e-07, "log_odds_chosen": 1.2645206451416016, "log_odds_ratio": -0.534331738948822, "logits/chosen": 1.1847504377365112, "logits/rejected": 1.1416434049606323, "logps/chosen": -2.397195339202881, "logps/rejected": -3.606353998184204, "loss": 0.6608, "nll_loss": 0.6073499321937561, "rewards/accuracies": 0.625, "rewards/chosen": -0.23971952497959137, "rewards/margins": 0.12091588973999023, "rewards/rejected": -0.3606353998184204, "step": 6460 }, { "epoch": 17.689253935660506, "grad_norm": 7.651116847991943, "learning_rate": 1.1493150684931506e-07, "log_odds_chosen": 3.5333056449890137, "log_odds_ratio": -0.15995505452156067, "logits/chosen": 1.2848988771438599, "logits/rejected": 1.3017828464508057, "logps/chosen": -2.2926902770996094, "logps/rejected": -5.574113368988037, "loss": 0.6052, "nll_loss": 0.5891661643981934, "rewards/accuracies": 1.0, "rewards/chosen": -0.22926901280879974, "rewards/margins": 0.32814234495162964, "rewards/rejected": -0.5574113130569458, "step": 6461 }, { "epoch": 17.691991786447637, "grad_norm": 2.9547269344329834, "learning_rate": 1.147945205479452e-07, "log_odds_chosen": 3.328897714614868, "log_odds_ratio": -0.10037411004304886, "logits/chosen": 1.28633713722229, "logits/rejected": 1.2789884805679321, "logps/chosen": -1.8132609128952026, "logps/rejected": -4.884083271026611, "loss": 0.4498, "nll_loss": 0.4397634267807007, "rewards/accuracies": 1.0, "rewards/chosen": -0.18132609128952026, "rewards/margins": 0.30708223581314087, "rewards/rejected": -0.48840832710266113, "step": 6462 }, { "epoch": 17.694729637234772, "grad_norm": 3.5575084686279297, "learning_rate": 1.1465753424657534e-07, "log_odds_chosen": 4.6720476150512695, "log_odds_ratio": -0.15277086198329926, "logits/chosen": 1.2403753995895386, "logits/rejected": 1.219298005104065, "logps/chosen": -1.9242340326309204, "logps/rejected": -6.434157371520996, "loss": 0.4577, "nll_loss": 0.4424280524253845, "rewards/accuracies": 1.0, "rewards/chosen": -0.19242340326309204, "rewards/margins": 0.4509923756122589, "rewards/rejected": -0.6434158086776733, "step": 6463 }, { "epoch": 17.697467488021903, "grad_norm": 4.041560649871826, "learning_rate": 1.1452054794520548e-07, "log_odds_chosen": 4.225237846374512, "log_odds_ratio": -0.23940779268741608, "logits/chosen": 0.9406577348709106, "logits/rejected": 1.013718605041504, "logps/chosen": -1.9550937414169312, "logps/rejected": -5.96750545501709, "loss": 0.6928, "nll_loss": 0.6688460111618042, "rewards/accuracies": 0.875, "rewards/chosen": -0.1955093890428543, "rewards/margins": 0.4012411832809448, "rewards/rejected": -0.5967506170272827, "step": 6464 }, { "epoch": 17.700205338809035, "grad_norm": 3.8169589042663574, "learning_rate": 1.1438356164383562e-07, "log_odds_chosen": 2.2742903232574463, "log_odds_ratio": -0.2434617131948471, "logits/chosen": 1.1880214214324951, "logits/rejected": 1.1257104873657227, "logps/chosen": -1.7611565589904785, "logps/rejected": -3.879012107849121, "loss": 0.6064, "nll_loss": 0.582017719745636, "rewards/accuracies": 0.75, "rewards/chosen": -0.17611566185951233, "rewards/margins": 0.21178556978702545, "rewards/rejected": -0.387901246547699, "step": 6465 }, { "epoch": 17.702943189596166, "grad_norm": 4.345974922180176, "learning_rate": 1.1424657534246575e-07, "log_odds_chosen": 2.4268946647644043, "log_odds_ratio": -0.2025989592075348, "logits/chosen": 1.1749604940414429, "logits/rejected": 1.163694143295288, "logps/chosen": -2.4087119102478027, "logps/rejected": -4.724975109100342, "loss": 0.5382, "nll_loss": 0.5179294347763062, "rewards/accuracies": 1.0, "rewards/chosen": -0.24087117612361908, "rewards/margins": 0.23162637650966644, "rewards/rejected": -0.4724975824356079, "step": 6466 }, { "epoch": 17.7056810403833, "grad_norm": 3.413038969039917, "learning_rate": 1.1410958904109588e-07, "log_odds_chosen": 4.238490581512451, "log_odds_ratio": -0.13516774773597717, "logits/chosen": 1.2489386796951294, "logits/rejected": 1.1742668151855469, "logps/chosen": -1.9616450071334839, "logps/rejected": -6.010594367980957, "loss": 0.477, "nll_loss": 0.46350374817848206, "rewards/accuracies": 1.0, "rewards/chosen": -0.19616450369358063, "rewards/margins": 0.40489494800567627, "rewards/rejected": -0.6010594367980957, "step": 6467 }, { "epoch": 17.70841889117043, "grad_norm": 4.347342491149902, "learning_rate": 1.1397260273972602e-07, "log_odds_chosen": 3.009531259536743, "log_odds_ratio": -0.12692083418369293, "logits/chosen": 0.891598641872406, "logits/rejected": 0.8473398685455322, "logps/chosen": -1.4636940956115723, "logps/rejected": -4.169212341308594, "loss": 0.475, "nll_loss": 0.46229416131973267, "rewards/accuracies": 1.0, "rewards/chosen": -0.14636942744255066, "rewards/margins": 0.27055180072784424, "rewards/rejected": -0.4169211983680725, "step": 6468 }, { "epoch": 17.711156741957563, "grad_norm": 3.786532163619995, "learning_rate": 1.1383561643835616e-07, "log_odds_chosen": 2.3363308906555176, "log_odds_ratio": -0.20217472314834595, "logits/chosen": 0.8056588768959045, "logits/rejected": 0.7909038662910461, "logps/chosen": -1.18777334690094, "logps/rejected": -3.1504592895507812, "loss": 0.6173, "nll_loss": 0.5970665216445923, "rewards/accuracies": 1.0, "rewards/chosen": -0.11877734214067459, "rewards/margins": 0.19626860320568085, "rewards/rejected": -0.31504595279693604, "step": 6469 }, { "epoch": 17.713894592744694, "grad_norm": 7.58353328704834, "learning_rate": 1.136986301369863e-07, "log_odds_chosen": 0.8223096132278442, "log_odds_ratio": -0.6169422268867493, "logits/chosen": 1.010305404663086, "logits/rejected": 0.9939741492271423, "logps/chosen": -3.0878098011016846, "logps/rejected": -3.7757391929626465, "loss": 0.5973, "nll_loss": 0.5356044769287109, "rewards/accuracies": 0.875, "rewards/chosen": -0.3087809681892395, "rewards/margins": 0.06879293918609619, "rewards/rejected": -0.3775739073753357, "step": 6470 }, { "epoch": 17.71663244353183, "grad_norm": 3.9585373401641846, "learning_rate": 1.1356164383561644e-07, "log_odds_chosen": 3.7290782928466797, "log_odds_ratio": -0.12280955910682678, "logits/chosen": 1.19607675075531, "logits/rejected": 1.1872788667678833, "logps/chosen": -1.407035231590271, "logps/rejected": -4.881376266479492, "loss": 0.4331, "nll_loss": 0.4208393096923828, "rewards/accuracies": 1.0, "rewards/chosen": -0.14070352911949158, "rewards/margins": 0.3474341034889221, "rewards/rejected": -0.4881376326084137, "step": 6471 }, { "epoch": 17.71937029431896, "grad_norm": 6.325323104858398, "learning_rate": 1.1342465753424657e-07, "log_odds_chosen": 2.370532989501953, "log_odds_ratio": -0.3245064318180084, "logits/chosen": 0.8721290826797485, "logits/rejected": 0.84416663646698, "logps/chosen": -2.0826339721679688, "logps/rejected": -4.302944660186768, "loss": 0.6576, "nll_loss": 0.625150203704834, "rewards/accuracies": 0.875, "rewards/chosen": -0.20826341211795807, "rewards/margins": 0.22203107178211212, "rewards/rejected": -0.4302944839000702, "step": 6472 }, { "epoch": 17.72210814510609, "grad_norm": 3.78706431388855, "learning_rate": 1.132876712328767e-07, "log_odds_chosen": 2.206899404525757, "log_odds_ratio": -0.15155869722366333, "logits/chosen": 1.231950044631958, "logits/rejected": 1.2326679229736328, "logps/chosen": -2.1592321395874023, "logps/rejected": -4.175695419311523, "loss": 0.4767, "nll_loss": 0.46158772706985474, "rewards/accuracies": 1.0, "rewards/chosen": -0.2159232199192047, "rewards/margins": 0.2016463577747345, "rewards/rejected": -0.4175695776939392, "step": 6473 }, { "epoch": 17.724845995893222, "grad_norm": 4.7345380783081055, "learning_rate": 1.1315068493150684e-07, "log_odds_chosen": 2.2038793563842773, "log_odds_ratio": -0.1576179713010788, "logits/chosen": 1.0197534561157227, "logits/rejected": 1.0952622890472412, "logps/chosen": -2.6760365962982178, "logps/rejected": -4.787665367126465, "loss": 0.6338, "nll_loss": 0.6180349588394165, "rewards/accuracies": 1.0, "rewards/chosen": -0.26760366559028625, "rewards/margins": 0.21116286516189575, "rewards/rejected": -0.478766530752182, "step": 6474 }, { "epoch": 17.727583846680357, "grad_norm": 4.229880332946777, "learning_rate": 1.1301369863013698e-07, "log_odds_chosen": 2.52532958984375, "log_odds_ratio": -0.2639541029930115, "logits/chosen": 0.9305607676506042, "logits/rejected": 0.7521853446960449, "logps/chosen": -1.1616318225860596, "logps/rejected": -3.410627603530884, "loss": 0.5388, "nll_loss": 0.512373685836792, "rewards/accuracies": 1.0, "rewards/chosen": -0.11616319417953491, "rewards/margins": 0.22489959001541138, "rewards/rejected": -0.3410627543926239, "step": 6475 }, { "epoch": 17.730321697467488, "grad_norm": 10.112809181213379, "learning_rate": 1.1287671232876713e-07, "log_odds_chosen": 1.7330617904663086, "log_odds_ratio": -0.6726253628730774, "logits/chosen": 1.006156086921692, "logits/rejected": 0.9366223216056824, "logps/chosen": -2.250758171081543, "logps/rejected": -3.8382363319396973, "loss": 0.523, "nll_loss": 0.4557383060455322, "rewards/accuracies": 0.75, "rewards/chosen": -0.2250758409500122, "rewards/margins": 0.15874779224395752, "rewards/rejected": -0.3838236629962921, "step": 6476 }, { "epoch": 17.73305954825462, "grad_norm": 3.5207228660583496, "learning_rate": 1.1273972602739726e-07, "log_odds_chosen": 3.4627344608306885, "log_odds_ratio": -0.3064272403717041, "logits/chosen": 1.0245596170425415, "logits/rejected": 1.038860559463501, "logps/chosen": -2.2508444786071777, "logps/rejected": -5.52156400680542, "loss": 0.5851, "nll_loss": 0.5545015335083008, "rewards/accuracies": 0.875, "rewards/chosen": -0.22508443892002106, "rewards/margins": 0.3270719647407532, "rewards/rejected": -0.552156388759613, "step": 6477 }, { "epoch": 17.73579739904175, "grad_norm": 3.896688461303711, "learning_rate": 1.126027397260274e-07, "log_odds_chosen": 2.7584950923919678, "log_odds_ratio": -0.14667364954948425, "logits/chosen": 1.2655048370361328, "logits/rejected": 1.3102778196334839, "logps/chosen": -2.07419753074646, "logps/rejected": -4.7088470458984375, "loss": 0.502, "nll_loss": 0.4873088002204895, "rewards/accuracies": 0.875, "rewards/chosen": -0.207419753074646, "rewards/margins": 0.26346492767333984, "rewards/rejected": -0.47088468074798584, "step": 6478 }, { "epoch": 17.738535249828885, "grad_norm": 3.6701221466064453, "learning_rate": 1.1246575342465753e-07, "log_odds_chosen": 4.383022308349609, "log_odds_ratio": -0.10218919813632965, "logits/chosen": 1.1636403799057007, "logits/rejected": 1.1843198537826538, "logps/chosen": -1.5659310817718506, "logps/rejected": -5.724584579467773, "loss": 0.5446, "nll_loss": 0.5344182848930359, "rewards/accuracies": 1.0, "rewards/chosen": -0.15659311413764954, "rewards/margins": 0.41586533188819885, "rewards/rejected": -0.5724584460258484, "step": 6479 }, { "epoch": 17.741273100616016, "grad_norm": 3.8787930011749268, "learning_rate": 1.1232876712328766e-07, "log_odds_chosen": 1.6266498565673828, "log_odds_ratio": -0.2865425646305084, "logits/chosen": 0.8501267433166504, "logits/rejected": 0.926422119140625, "logps/chosen": -1.7800368070602417, "logps/rejected": -3.239551305770874, "loss": 0.5266, "nll_loss": 0.49791058897972107, "rewards/accuracies": 0.875, "rewards/chosen": -0.1780036985874176, "rewards/margins": 0.14595143496990204, "rewards/rejected": -0.32395511865615845, "step": 6480 }, { "epoch": 17.744010951403148, "grad_norm": 3.954097270965576, "learning_rate": 1.121917808219178e-07, "log_odds_chosen": 3.908343553543091, "log_odds_ratio": -0.22912226617336273, "logits/chosen": 1.112518072128296, "logits/rejected": 1.093557596206665, "logps/chosen": -1.577895998954773, "logps/rejected": -5.305874824523926, "loss": 0.4791, "nll_loss": 0.4561600685119629, "rewards/accuracies": 1.0, "rewards/chosen": -0.15778958797454834, "rewards/margins": 0.3727979362010956, "rewards/rejected": -0.5305875539779663, "step": 6481 }, { "epoch": 17.746748802190282, "grad_norm": 2.960374116897583, "learning_rate": 1.1205479452054795e-07, "log_odds_chosen": 2.5436668395996094, "log_odds_ratio": -0.09780345857143402, "logits/chosen": 1.1450921297073364, "logits/rejected": 1.029937744140625, "logps/chosen": -1.3123129606246948, "logps/rejected": -3.545722007751465, "loss": 0.5071, "nll_loss": 0.49734288454055786, "rewards/accuracies": 1.0, "rewards/chosen": -0.13123130798339844, "rewards/margins": 0.22334089875221252, "rewards/rejected": -0.35457220673561096, "step": 6482 }, { "epoch": 17.749486652977414, "grad_norm": 3.706727981567383, "learning_rate": 1.1191780821917808e-07, "log_odds_chosen": 2.031217098236084, "log_odds_ratio": -0.2609773874282837, "logits/chosen": 1.1829564571380615, "logits/rejected": 1.1576642990112305, "logps/chosen": -1.4366612434387207, "logps/rejected": -3.2412924766540527, "loss": 0.4585, "nll_loss": 0.4324426054954529, "rewards/accuracies": 1.0, "rewards/chosen": -0.1436661183834076, "rewards/margins": 0.18046313524246216, "rewards/rejected": -0.32412925362586975, "step": 6483 }, { "epoch": 17.752224503764545, "grad_norm": 3.3100650310516357, "learning_rate": 1.1178082191780821e-07, "log_odds_chosen": 1.440264105796814, "log_odds_ratio": -0.28811967372894287, "logits/chosen": 1.2086927890777588, "logits/rejected": 1.1194970607757568, "logps/chosen": -1.3738653659820557, "logps/rejected": -2.57671856880188, "loss": 0.4184, "nll_loss": 0.3895495533943176, "rewards/accuracies": 1.0, "rewards/chosen": -0.1373865306377411, "rewards/margins": 0.12028533220291138, "rewards/rejected": -0.25767186284065247, "step": 6484 }, { "epoch": 17.754962354551676, "grad_norm": 3.698301315307617, "learning_rate": 1.1164383561643835e-07, "log_odds_chosen": 3.742732286453247, "log_odds_ratio": -0.2026616483926773, "logits/chosen": 0.9348682165145874, "logits/rejected": 0.9377954602241516, "logps/chosen": -1.5498650074005127, "logps/rejected": -4.9908552169799805, "loss": 0.4501, "nll_loss": 0.4298470616340637, "rewards/accuracies": 1.0, "rewards/chosen": -0.15498650074005127, "rewards/margins": 0.3440990447998047, "rewards/rejected": -0.49908554553985596, "step": 6485 }, { "epoch": 17.75770020533881, "grad_norm": 3.8468215465545654, "learning_rate": 1.1150684931506849e-07, "log_odds_chosen": 3.271977186203003, "log_odds_ratio": -0.1661873310804367, "logits/chosen": 1.1491094827651978, "logits/rejected": 1.1257647275924683, "logps/chosen": -1.9260978698730469, "logps/rejected": -5.0110602378845215, "loss": 0.4483, "nll_loss": 0.4316697120666504, "rewards/accuracies": 1.0, "rewards/chosen": -0.1926097869873047, "rewards/margins": 0.30849623680114746, "rewards/rejected": -0.5011060237884521, "step": 6486 }, { "epoch": 17.760438056125942, "grad_norm": 4.573254585266113, "learning_rate": 1.1136986301369862e-07, "log_odds_chosen": 0.8345833420753479, "log_odds_ratio": -0.5488052368164062, "logits/chosen": 1.0282152891159058, "logits/rejected": 1.005787968635559, "logps/chosen": -2.2188291549682617, "logps/rejected": -2.994102954864502, "loss": 0.5724, "nll_loss": 0.5175474882125854, "rewards/accuracies": 0.75, "rewards/chosen": -0.22188293933868408, "rewards/margins": 0.07752737402915955, "rewards/rejected": -0.29941028356552124, "step": 6487 }, { "epoch": 17.763175906913073, "grad_norm": 4.492514610290527, "learning_rate": 1.1123287671232877e-07, "log_odds_chosen": 2.547532558441162, "log_odds_ratio": -0.37646645307540894, "logits/chosen": 0.9878362417221069, "logits/rejected": 1.0841147899627686, "logps/chosen": -1.7295029163360596, "logps/rejected": -4.029184818267822, "loss": 0.5023, "nll_loss": 0.46462976932525635, "rewards/accuracies": 0.875, "rewards/chosen": -0.17295029759407043, "rewards/margins": 0.22996821999549866, "rewards/rejected": -0.40291857719421387, "step": 6488 }, { "epoch": 17.765913757700204, "grad_norm": 3.879673957824707, "learning_rate": 1.1109589041095891e-07, "log_odds_chosen": 3.567995071411133, "log_odds_ratio": -0.15311819314956665, "logits/chosen": 0.9681580066680908, "logits/rejected": 1.0073471069335938, "logps/chosen": -1.3341423273086548, "logps/rejected": -4.527041912078857, "loss": 0.5447, "nll_loss": 0.5294146537780762, "rewards/accuracies": 1.0, "rewards/chosen": -0.13341423869132996, "rewards/margins": 0.3192899525165558, "rewards/rejected": -0.45270419120788574, "step": 6489 }, { "epoch": 17.76865160848734, "grad_norm": 3.6357178688049316, "learning_rate": 1.1095890410958903e-07, "log_odds_chosen": 3.315843105316162, "log_odds_ratio": -0.13342198729515076, "logits/chosen": 1.216171145439148, "logits/rejected": 1.178460717201233, "logps/chosen": -1.495613932609558, "logps/rejected": -4.529963493347168, "loss": 0.4758, "nll_loss": 0.46246784925460815, "rewards/accuracies": 1.0, "rewards/chosen": -0.14956139028072357, "rewards/margins": 0.30343493819236755, "rewards/rejected": -0.4529963433742523, "step": 6490 }, { "epoch": 17.77138945927447, "grad_norm": 4.511664390563965, "learning_rate": 1.1082191780821917e-07, "log_odds_chosen": 3.950098991394043, "log_odds_ratio": -0.16926856338977814, "logits/chosen": 1.0516945123672485, "logits/rejected": 1.0708521604537964, "logps/chosen": -2.5151429176330566, "logps/rejected": -6.283492088317871, "loss": 0.6802, "nll_loss": 0.663298487663269, "rewards/accuracies": 1.0, "rewards/chosen": -0.2515143156051636, "rewards/margins": 0.376834899187088, "rewards/rejected": -0.628349244594574, "step": 6491 }, { "epoch": 17.7741273100616, "grad_norm": 3.558014154434204, "learning_rate": 1.1068493150684931e-07, "log_odds_chosen": 4.28016471862793, "log_odds_ratio": -0.11432629078626633, "logits/chosen": 1.089056134223938, "logits/rejected": 1.0418293476104736, "logps/chosen": -1.5724570751190186, "logps/rejected": -5.623613357543945, "loss": 0.5352, "nll_loss": 0.523807168006897, "rewards/accuracies": 1.0, "rewards/chosen": -0.1572457104921341, "rewards/margins": 0.40511566400527954, "rewards/rejected": -0.5623613595962524, "step": 6492 }, { "epoch": 17.776865160848732, "grad_norm": 4.591450214385986, "learning_rate": 1.1054794520547944e-07, "log_odds_chosen": 3.9930882453918457, "log_odds_ratio": -0.18162299692630768, "logits/chosen": 1.1739552021026611, "logits/rejected": 1.1566065549850464, "logps/chosen": -2.2663440704345703, "logps/rejected": -6.030550003051758, "loss": 0.5726, "nll_loss": 0.5544700622558594, "rewards/accuracies": 1.0, "rewards/chosen": -0.2266344130039215, "rewards/margins": 0.3764205574989319, "rewards/rejected": -0.6030550003051758, "step": 6493 }, { "epoch": 17.779603011635867, "grad_norm": 4.215994358062744, "learning_rate": 1.1041095890410958e-07, "log_odds_chosen": 1.9291332960128784, "log_odds_ratio": -0.24329233169555664, "logits/chosen": 0.9381374716758728, "logits/rejected": 0.9548315405845642, "logps/chosen": -1.6153032779693604, "logps/rejected": -3.3853940963745117, "loss": 0.616, "nll_loss": 0.5916880965232849, "rewards/accuracies": 1.0, "rewards/chosen": -0.16153034567832947, "rewards/margins": 0.17700907588005066, "rewards/rejected": -0.33853939175605774, "step": 6494 }, { "epoch": 17.782340862423, "grad_norm": 4.792662143707275, "learning_rate": 1.1027397260273973e-07, "log_odds_chosen": 2.22355318069458, "log_odds_ratio": -0.18133321404457092, "logits/chosen": 0.8786888122558594, "logits/rejected": 0.7769322395324707, "logps/chosen": -1.8638603687286377, "logps/rejected": -3.9110589027404785, "loss": 0.4772, "nll_loss": 0.4590436816215515, "rewards/accuracies": 1.0, "rewards/chosen": -0.18638603389263153, "rewards/margins": 0.20471985638141632, "rewards/rejected": -0.39110589027404785, "step": 6495 }, { "epoch": 17.78507871321013, "grad_norm": 4.5904860496521, "learning_rate": 1.1013698630136987e-07, "log_odds_chosen": 1.2615751028060913, "log_odds_ratio": -0.32296091318130493, "logits/chosen": 0.9410662055015564, "logits/rejected": 0.9751875400543213, "logps/chosen": -2.0900278091430664, "logps/rejected": -3.258023262023926, "loss": 0.6296, "nll_loss": 0.5973435044288635, "rewards/accuracies": 0.875, "rewards/chosen": -0.2090027928352356, "rewards/margins": 0.11679953336715698, "rewards/rejected": -0.3258023262023926, "step": 6496 }, { "epoch": 17.78781656399726, "grad_norm": 6.336334228515625, "learning_rate": 1.0999999999999999e-07, "log_odds_chosen": 1.9104584455490112, "log_odds_ratio": -0.22472801804542542, "logits/chosen": 0.8858176469802856, "logits/rejected": 0.7982907295227051, "logps/chosen": -2.113126754760742, "logps/rejected": -3.81953501701355, "loss": 0.4938, "nll_loss": 0.47131988406181335, "rewards/accuracies": 0.875, "rewards/chosen": -0.21131271123886108, "rewards/margins": 0.17064081132411957, "rewards/rejected": -0.38195350766181946, "step": 6497 }, { "epoch": 17.790554414784395, "grad_norm": 5.401786804199219, "learning_rate": 1.0986301369863013e-07, "log_odds_chosen": 1.9562875032424927, "log_odds_ratio": -0.4097639322280884, "logits/chosen": 0.9973815679550171, "logits/rejected": 0.982138454914093, "logps/chosen": -2.4913625717163086, "logps/rejected": -4.317775249481201, "loss": 0.575, "nll_loss": 0.5339887738227844, "rewards/accuracies": 0.75, "rewards/chosen": -0.24913626909255981, "rewards/margins": 0.18264128267765045, "rewards/rejected": -0.4317775368690491, "step": 6498 }, { "epoch": 17.793292265571527, "grad_norm": 4.007244110107422, "learning_rate": 1.0972602739726027e-07, "log_odds_chosen": 2.652289867401123, "log_odds_ratio": -0.14892075955867767, "logits/chosen": 1.1537774801254272, "logits/rejected": 1.1545116901397705, "logps/chosen": -2.32661771774292, "logps/rejected": -4.820870876312256, "loss": 0.6007, "nll_loss": 0.585839033126831, "rewards/accuracies": 1.0, "rewards/chosen": -0.23266176879405975, "rewards/margins": 0.24942536652088165, "rewards/rejected": -0.48208707571029663, "step": 6499 }, { "epoch": 17.796030116358658, "grad_norm": 4.975592613220215, "learning_rate": 1.095890410958904e-07, "log_odds_chosen": 4.244482040405273, "log_odds_ratio": -0.1560516357421875, "logits/chosen": 0.9040383696556091, "logits/rejected": 0.8161020278930664, "logps/chosen": -1.572495937347412, "logps/rejected": -5.454173564910889, "loss": 0.58, "nll_loss": 0.5643609762191772, "rewards/accuracies": 1.0, "rewards/chosen": -0.1572495996952057, "rewards/margins": 0.38816776871681213, "rewards/rejected": -0.5454173684120178, "step": 6500 }, { "epoch": 17.79876796714579, "grad_norm": 3.9810144901275635, "learning_rate": 1.0945205479452055e-07, "log_odds_chosen": 3.0549685955047607, "log_odds_ratio": -0.24792948365211487, "logits/chosen": 1.2546889781951904, "logits/rejected": 1.2739040851593018, "logps/chosen": -1.3917295932769775, "logps/rejected": -4.242588520050049, "loss": 0.432, "nll_loss": 0.40721774101257324, "rewards/accuracies": 1.0, "rewards/chosen": -0.1391729712486267, "rewards/margins": 0.28508591651916504, "rewards/rejected": -0.42425891757011414, "step": 6501 }, { "epoch": 17.801505817932924, "grad_norm": 4.109470844268799, "learning_rate": 1.0931506849315069e-07, "log_odds_chosen": 2.9698705673217773, "log_odds_ratio": -0.20215155184268951, "logits/chosen": 1.1718683242797852, "logits/rejected": 1.232003927230835, "logps/chosen": -1.9717462062835693, "logps/rejected": -4.743841171264648, "loss": 0.549, "nll_loss": 0.5287613272666931, "rewards/accuracies": 1.0, "rewards/chosen": -0.19717462360858917, "rewards/margins": 0.2772095203399658, "rewards/rejected": -0.4743841290473938, "step": 6502 }, { "epoch": 17.804243668720055, "grad_norm": 3.6358234882354736, "learning_rate": 1.0917808219178081e-07, "log_odds_chosen": 2.1665291786193848, "log_odds_ratio": -0.18117506802082062, "logits/chosen": 1.2436175346374512, "logits/rejected": 1.2148606777191162, "logps/chosen": -1.8330152034759521, "logps/rejected": -3.7081234455108643, "loss": 0.451, "nll_loss": 0.43292298913002014, "rewards/accuracies": 1.0, "rewards/chosen": -0.18330152332782745, "rewards/margins": 0.18751080334186554, "rewards/rejected": -0.3708122968673706, "step": 6503 }, { "epoch": 17.806981519507186, "grad_norm": 3.485499620437622, "learning_rate": 1.0904109589041095e-07, "log_odds_chosen": 4.466350078582764, "log_odds_ratio": -0.09250243753194809, "logits/chosen": 1.0207425355911255, "logits/rejected": 1.0029217004776, "logps/chosen": -1.945208191871643, "logps/rejected": -6.260581016540527, "loss": 0.558, "nll_loss": 0.5487979054450989, "rewards/accuracies": 1.0, "rewards/chosen": -0.19452083110809326, "rewards/margins": 0.43153730034828186, "rewards/rejected": -0.6260581016540527, "step": 6504 }, { "epoch": 17.809719370294317, "grad_norm": 8.093518257141113, "learning_rate": 1.0890410958904109e-07, "log_odds_chosen": 3.0530965328216553, "log_odds_ratio": -0.6621384024620056, "logits/chosen": 0.9571511745452881, "logits/rejected": 0.9848824739456177, "logps/chosen": -2.902263879776001, "logps/rejected": -5.820017337799072, "loss": 0.7926, "nll_loss": 0.7263550758361816, "rewards/accuracies": 0.875, "rewards/chosen": -0.29022639989852905, "rewards/margins": 0.29177528619766235, "rewards/rejected": -0.5820016860961914, "step": 6505 }, { "epoch": 17.812457221081452, "grad_norm": 6.597176551818848, "learning_rate": 1.0876712328767123e-07, "log_odds_chosen": 2.676755905151367, "log_odds_ratio": -0.11419399082660675, "logits/chosen": 1.1366353034973145, "logits/rejected": 0.9940861463546753, "logps/chosen": -1.7425554990768433, "logps/rejected": -4.193300724029541, "loss": 0.5166, "nll_loss": 0.505221426486969, "rewards/accuracies": 1.0, "rewards/chosen": -0.17425554990768433, "rewards/margins": 0.24507451057434082, "rewards/rejected": -0.41933006048202515, "step": 6506 }, { "epoch": 17.815195071868583, "grad_norm": 4.620912551879883, "learning_rate": 1.0863013698630137e-07, "log_odds_chosen": 3.5798535346984863, "log_odds_ratio": -0.252430260181427, "logits/chosen": 0.9371097087860107, "logits/rejected": 0.9257795810699463, "logps/chosen": -3.034618854522705, "logps/rejected": -6.565943241119385, "loss": 0.6652, "nll_loss": 0.6399481296539307, "rewards/accuracies": 0.875, "rewards/chosen": -0.3034619092941284, "rewards/margins": 0.353132426738739, "rewards/rejected": -0.6565943360328674, "step": 6507 }, { "epoch": 17.817932922655714, "grad_norm": 3.8994250297546387, "learning_rate": 1.084931506849315e-07, "log_odds_chosen": 2.8622779846191406, "log_odds_ratio": -0.13242603838443756, "logits/chosen": 1.2964897155761719, "logits/rejected": 1.3452346324920654, "logps/chosen": -1.979888677597046, "logps/rejected": -4.662581443786621, "loss": 0.5573, "nll_loss": 0.5441024303436279, "rewards/accuracies": 1.0, "rewards/chosen": -0.1979888528585434, "rewards/margins": 0.2682693302631378, "rewards/rejected": -0.4662581980228424, "step": 6508 }, { "epoch": 17.82067077344285, "grad_norm": 3.3713533878326416, "learning_rate": 1.0835616438356165e-07, "log_odds_chosen": 4.613069534301758, "log_odds_ratio": -0.11418082565069199, "logits/chosen": 1.2743167877197266, "logits/rejected": 1.296179175376892, "logps/chosen": -2.30263614654541, "logps/rejected": -6.806912422180176, "loss": 0.6125, "nll_loss": 0.6011149883270264, "rewards/accuracies": 1.0, "rewards/chosen": -0.2302636206150055, "rewards/margins": 0.45042771100997925, "rewards/rejected": -0.6806913018226624, "step": 6509 }, { "epoch": 17.82340862422998, "grad_norm": 3.164429187774658, "learning_rate": 1.0821917808219177e-07, "log_odds_chosen": 2.3202056884765625, "log_odds_ratio": -0.16387222707271576, "logits/chosen": 1.122531771659851, "logits/rejected": 1.1189545392990112, "logps/chosen": -1.5459997653961182, "logps/rejected": -3.6103885173797607, "loss": 0.45, "nll_loss": 0.4335991144180298, "rewards/accuracies": 1.0, "rewards/chosen": -0.15459997951984406, "rewards/margins": 0.2064388543367386, "rewards/rejected": -0.36103886365890503, "step": 6510 }, { "epoch": 17.82614647501711, "grad_norm": 3.2461278438568115, "learning_rate": 1.0808219178082191e-07, "log_odds_chosen": 3.0565216541290283, "log_odds_ratio": -0.1989302784204483, "logits/chosen": 1.0048165321350098, "logits/rejected": 1.0295658111572266, "logps/chosen": -1.1663408279418945, "logps/rejected": -3.867128849029541, "loss": 0.4942, "nll_loss": 0.47432368993759155, "rewards/accuracies": 1.0, "rewards/chosen": -0.11663408577442169, "rewards/margins": 0.2700788080692291, "rewards/rejected": -0.386712908744812, "step": 6511 }, { "epoch": 17.828884325804243, "grad_norm": 6.053582191467285, "learning_rate": 1.0794520547945205e-07, "log_odds_chosen": 0.841300368309021, "log_odds_ratio": -0.5529349446296692, "logits/chosen": 1.153425693511963, "logits/rejected": 1.0831272602081299, "logps/chosen": -2.2773964405059814, "logps/rejected": -2.9840784072875977, "loss": 0.6494, "nll_loss": 0.5940971374511719, "rewards/accuracies": 0.75, "rewards/chosen": -0.22773964703083038, "rewards/margins": 0.07066820561885834, "rewards/rejected": -0.2984078526496887, "step": 6512 }, { "epoch": 17.831622176591377, "grad_norm": 3.960420608520508, "learning_rate": 1.0780821917808219e-07, "log_odds_chosen": 4.9325666427612305, "log_odds_ratio": -0.08799225836992264, "logits/chosen": 1.0781326293945312, "logits/rejected": 1.081552267074585, "logps/chosen": -2.255044937133789, "logps/rejected": -7.015888690948486, "loss": 0.5618, "nll_loss": 0.553004264831543, "rewards/accuracies": 1.0, "rewards/chosen": -0.22550448775291443, "rewards/margins": 0.476084440946579, "rewards/rejected": -0.7015888690948486, "step": 6513 }, { "epoch": 17.83436002737851, "grad_norm": 5.298652172088623, "learning_rate": 1.0767123287671233e-07, "log_odds_chosen": 1.6137428283691406, "log_odds_ratio": -0.2885259687900543, "logits/chosen": 0.9227714538574219, "logits/rejected": 0.9293839931488037, "logps/chosen": -2.1598737239837646, "logps/rejected": -3.646559715270996, "loss": 0.4895, "nll_loss": 0.4606654942035675, "rewards/accuracies": 1.0, "rewards/chosen": -0.2159873992204666, "rewards/margins": 0.148668572306633, "rewards/rejected": -0.3646559715270996, "step": 6514 }, { "epoch": 17.83709787816564, "grad_norm": 5.141437530517578, "learning_rate": 1.0753424657534247e-07, "log_odds_chosen": 3.1806416511535645, "log_odds_ratio": -0.20412737131118774, "logits/chosen": 1.0327131748199463, "logits/rejected": 0.9737203121185303, "logps/chosen": -2.0671932697296143, "logps/rejected": -5.087460517883301, "loss": 0.5964, "nll_loss": 0.5760188102722168, "rewards/accuracies": 1.0, "rewards/chosen": -0.2067193239927292, "rewards/margins": 0.3020267188549042, "rewards/rejected": -0.5087460279464722, "step": 6515 }, { "epoch": 17.83983572895277, "grad_norm": 3.6826553344726562, "learning_rate": 1.073972602739726e-07, "log_odds_chosen": 4.803771018981934, "log_odds_ratio": -0.08760641515254974, "logits/chosen": 0.9585869312286377, "logits/rejected": 0.9542901515960693, "logps/chosen": -1.5475046634674072, "logps/rejected": -6.0654072761535645, "loss": 0.4709, "nll_loss": 0.46214091777801514, "rewards/accuracies": 1.0, "rewards/chosen": -0.15475046634674072, "rewards/margins": 0.4517902731895447, "rewards/rejected": -0.6065407395362854, "step": 6516 }, { "epoch": 17.842573579739906, "grad_norm": 4.082632541656494, "learning_rate": 1.0726027397260273e-07, "log_odds_chosen": 1.6363080739974976, "log_odds_ratio": -0.23680463433265686, "logits/chosen": 1.2020564079284668, "logits/rejected": 1.1415396928787231, "logps/chosen": -2.0538556575775146, "logps/rejected": -3.5407614707946777, "loss": 0.478, "nll_loss": 0.4543178677558899, "rewards/accuracies": 0.875, "rewards/chosen": -0.20538556575775146, "rewards/margins": 0.1486905962228775, "rewards/rejected": -0.3540761470794678, "step": 6517 }, { "epoch": 17.845311430527037, "grad_norm": 3.6901330947875977, "learning_rate": 1.0712328767123287e-07, "log_odds_chosen": 2.015486717224121, "log_odds_ratio": -0.24570472538471222, "logits/chosen": 1.2518911361694336, "logits/rejected": 1.2406628131866455, "logps/chosen": -1.5264548063278198, "logps/rejected": -3.2961065769195557, "loss": 0.4256, "nll_loss": 0.4010288715362549, "rewards/accuracies": 0.875, "rewards/chosen": -0.15264546871185303, "rewards/margins": 0.17696520686149597, "rewards/rejected": -0.3296107053756714, "step": 6518 }, { "epoch": 17.848049281314168, "grad_norm": 3.131629228591919, "learning_rate": 1.0698630136986301e-07, "log_odds_chosen": 2.519568681716919, "log_odds_ratio": -0.24295860528945923, "logits/chosen": 1.0530357360839844, "logits/rejected": 0.9659878611564636, "logps/chosen": -2.0292186737060547, "logps/rejected": -4.38956356048584, "loss": 0.4563, "nll_loss": 0.43196216225624084, "rewards/accuracies": 0.875, "rewards/chosen": -0.20292188227176666, "rewards/margins": 0.23603445291519165, "rewards/rejected": -0.4389563202857971, "step": 6519 }, { "epoch": 17.8507871321013, "grad_norm": 9.722145080566406, "learning_rate": 1.0684931506849315e-07, "log_odds_chosen": 2.6188182830810547, "log_odds_ratio": -0.16524875164031982, "logits/chosen": 1.2271416187286377, "logits/rejected": 1.1645456552505493, "logps/chosen": -1.8189270496368408, "logps/rejected": -4.238531112670898, "loss": 0.5547, "nll_loss": 0.5381642580032349, "rewards/accuracies": 1.0, "rewards/chosen": -0.18189270794391632, "rewards/margins": 0.24196040630340576, "rewards/rejected": -0.4238530993461609, "step": 6520 }, { "epoch": 17.853524982888434, "grad_norm": 4.040759086608887, "learning_rate": 1.0671232876712328e-07, "log_odds_chosen": 3.607646942138672, "log_odds_ratio": -0.14482589066028595, "logits/chosen": 1.0205150842666626, "logits/rejected": 1.110642910003662, "logps/chosen": -3.107862949371338, "logps/rejected": -6.480673313140869, "loss": 0.6165, "nll_loss": 0.601982057094574, "rewards/accuracies": 1.0, "rewards/chosen": -0.31078630685806274, "rewards/margins": 0.33728107810020447, "rewards/rejected": -0.6480673551559448, "step": 6521 }, { "epoch": 17.856262833675565, "grad_norm": 6.16895866394043, "learning_rate": 1.0657534246575342e-07, "log_odds_chosen": 2.524641990661621, "log_odds_ratio": -0.3688587546348572, "logits/chosen": 0.9863293170928955, "logits/rejected": 0.949404776096344, "logps/chosen": -2.8974671363830566, "logps/rejected": -5.113013744354248, "loss": 0.6146, "nll_loss": 0.5777526497840881, "rewards/accuracies": 0.75, "rewards/chosen": -0.2897467017173767, "rewards/margins": 0.221554696559906, "rewards/rejected": -0.5113013982772827, "step": 6522 }, { "epoch": 17.859000684462696, "grad_norm": 3.2313756942749023, "learning_rate": 1.0643835616438355e-07, "log_odds_chosen": 2.2903313636779785, "log_odds_ratio": -0.20249328017234802, "logits/chosen": 1.2163785696029663, "logits/rejected": 1.1986675262451172, "logps/chosen": -1.8130269050598145, "logps/rejected": -3.9503684043884277, "loss": 0.4911, "nll_loss": 0.4708390235900879, "rewards/accuracies": 1.0, "rewards/chosen": -0.18130269646644592, "rewards/margins": 0.21373416483402252, "rewards/rejected": -0.39503684639930725, "step": 6523 }, { "epoch": 17.861738535249827, "grad_norm": 3.4190292358398438, "learning_rate": 1.0630136986301369e-07, "log_odds_chosen": 3.8397135734558105, "log_odds_ratio": -0.08885696530342102, "logits/chosen": 1.1364444494247437, "logits/rejected": 1.0311493873596191, "logps/chosen": -1.6031181812286377, "logps/rejected": -5.06613826751709, "loss": 0.5432, "nll_loss": 0.5343301296234131, "rewards/accuracies": 1.0, "rewards/chosen": -0.16031181812286377, "rewards/margins": 0.34630197286605835, "rewards/rejected": -0.5066138505935669, "step": 6524 }, { "epoch": 17.864476386036962, "grad_norm": 5.480762004852295, "learning_rate": 1.0616438356164383e-07, "log_odds_chosen": 2.3934485912323, "log_odds_ratio": -0.2553578317165375, "logits/chosen": 1.0047755241394043, "logits/rejected": 0.8521711230278015, "logps/chosen": -1.7874627113342285, "logps/rejected": -3.949716329574585, "loss": 0.4344, "nll_loss": 0.4089055061340332, "rewards/accuracies": 1.0, "rewards/chosen": -0.1787462830543518, "rewards/margins": 0.21622535586357117, "rewards/rejected": -0.394971638917923, "step": 6525 }, { "epoch": 17.867214236824093, "grad_norm": 3.3130664825439453, "learning_rate": 1.0602739726027398e-07, "log_odds_chosen": 3.738717794418335, "log_odds_ratio": -0.18483716249465942, "logits/chosen": 1.3948519229888916, "logits/rejected": 1.3007516860961914, "logps/chosen": -2.0593817234039307, "logps/rejected": -5.607368469238281, "loss": 0.5821, "nll_loss": 0.5636570453643799, "rewards/accuracies": 1.0, "rewards/chosen": -0.2059381753206253, "rewards/margins": 0.35479870438575745, "rewards/rejected": -0.560736894607544, "step": 6526 }, { "epoch": 17.869952087611225, "grad_norm": 3.970172643661499, "learning_rate": 1.058904109589041e-07, "log_odds_chosen": 2.469146490097046, "log_odds_ratio": -0.25008338689804077, "logits/chosen": 0.9507578015327454, "logits/rejected": 0.8709261417388916, "logps/chosen": -1.3336899280548096, "logps/rejected": -3.520756959915161, "loss": 0.4331, "nll_loss": 0.4081331789493561, "rewards/accuracies": 1.0, "rewards/chosen": -0.13336899876594543, "rewards/margins": 0.21870669722557068, "rewards/rejected": -0.3520756959915161, "step": 6527 }, { "epoch": 17.872689938398356, "grad_norm": 3.2396950721740723, "learning_rate": 1.0575342465753424e-07, "log_odds_chosen": 4.708410263061523, "log_odds_ratio": -0.08356262743473053, "logits/chosen": 1.005576252937317, "logits/rejected": 1.043882966041565, "logps/chosen": -1.5106598138809204, "logps/rejected": -5.9151763916015625, "loss": 0.5453, "nll_loss": 0.5368994474411011, "rewards/accuracies": 1.0, "rewards/chosen": -0.15106597542762756, "rewards/margins": 0.44045165181159973, "rewards/rejected": -0.5915176868438721, "step": 6528 }, { "epoch": 17.87542778918549, "grad_norm": 4.238597869873047, "learning_rate": 1.0561643835616438e-07, "log_odds_chosen": 2.8823416233062744, "log_odds_ratio": -0.21712170541286469, "logits/chosen": 0.9916316270828247, "logits/rejected": 0.962146520614624, "logps/chosen": -1.9333338737487793, "logps/rejected": -4.682199478149414, "loss": 0.554, "nll_loss": 0.5323181748390198, "rewards/accuracies": 0.875, "rewards/chosen": -0.19333338737487793, "rewards/margins": 0.27488651871681213, "rewards/rejected": -0.46821993589401245, "step": 6529 }, { "epoch": 17.87816563997262, "grad_norm": 3.63043212890625, "learning_rate": 1.0547945205479451e-07, "log_odds_chosen": 2.520469903945923, "log_odds_ratio": -0.183061420917511, "logits/chosen": 0.9976228475570679, "logits/rejected": 0.8785400986671448, "logps/chosen": -1.4840123653411865, "logps/rejected": -3.7711658477783203, "loss": 0.4874, "nll_loss": 0.4690674841403961, "rewards/accuracies": 1.0, "rewards/chosen": -0.14840123057365417, "rewards/margins": 0.22871534526348114, "rewards/rejected": -0.3771165907382965, "step": 6530 }, { "epoch": 17.880903490759753, "grad_norm": 3.9022672176361084, "learning_rate": 1.0534246575342465e-07, "log_odds_chosen": 4.091456413269043, "log_odds_ratio": -0.13172608613967896, "logits/chosen": 1.1154829263687134, "logits/rejected": 1.1575461626052856, "logps/chosen": -2.0799031257629395, "logps/rejected": -5.960992813110352, "loss": 0.65, "nll_loss": 0.6368483304977417, "rewards/accuracies": 1.0, "rewards/chosen": -0.20799031853675842, "rewards/margins": 0.3881089687347412, "rewards/rejected": -0.5960992574691772, "step": 6531 }, { "epoch": 17.883641341546884, "grad_norm": 4.215837001800537, "learning_rate": 1.052054794520548e-07, "log_odds_chosen": 3.2733824253082275, "log_odds_ratio": -0.07287377864122391, "logits/chosen": 1.3062636852264404, "logits/rejected": 1.4060531854629517, "logps/chosen": -2.1977713108062744, "logps/rejected": -5.332428932189941, "loss": 0.6159, "nll_loss": 0.608566164970398, "rewards/accuracies": 1.0, "rewards/chosen": -0.21977713704109192, "rewards/margins": 0.31346577405929565, "rewards/rejected": -0.53324294090271, "step": 6532 }, { "epoch": 17.88637919233402, "grad_norm": 4.673511981964111, "learning_rate": 1.0506849315068492e-07, "log_odds_chosen": 6.050180435180664, "log_odds_ratio": -0.09120814502239227, "logits/chosen": 1.1742497682571411, "logits/rejected": 1.1923389434814453, "logps/chosen": -1.328822135925293, "logps/rejected": -7.019571304321289, "loss": 0.5559, "nll_loss": 0.5467634797096252, "rewards/accuracies": 1.0, "rewards/chosen": -0.13288220763206482, "rewards/margins": 0.5690749883651733, "rewards/rejected": -0.7019572257995605, "step": 6533 }, { "epoch": 17.88911704312115, "grad_norm": 4.560358047485352, "learning_rate": 1.0493150684931506e-07, "log_odds_chosen": 2.387882947921753, "log_odds_ratio": -0.37717482447624207, "logits/chosen": 1.189469337463379, "logits/rejected": 1.15015709400177, "logps/chosen": -2.465806245803833, "logps/rejected": -4.752466678619385, "loss": 0.5422, "nll_loss": 0.5044447183609009, "rewards/accuracies": 0.875, "rewards/chosen": -0.24658063054084778, "rewards/margins": 0.22866606712341309, "rewards/rejected": -0.47524669766426086, "step": 6534 }, { "epoch": 17.89185489390828, "grad_norm": 3.325620651245117, "learning_rate": 1.047945205479452e-07, "log_odds_chosen": 4.53034782409668, "log_odds_ratio": -0.06635679304599762, "logits/chosen": 1.1997556686401367, "logits/rejected": 1.215075135231018, "logps/chosen": -2.3361151218414307, "logps/rejected": -6.62688684463501, "loss": 0.5163, "nll_loss": 0.509670078754425, "rewards/accuracies": 1.0, "rewards/chosen": -0.23361152410507202, "rewards/margins": 0.4290771782398224, "rewards/rejected": -0.6626887321472168, "step": 6535 }, { "epoch": 17.894592744695416, "grad_norm": 3.9687983989715576, "learning_rate": 1.0465753424657534e-07, "log_odds_chosen": 1.402789831161499, "log_odds_ratio": -0.3182430863380432, "logits/chosen": 1.0926495790481567, "logits/rejected": 1.092327356338501, "logps/chosen": -1.574155569076538, "logps/rejected": -2.7777493000030518, "loss": 0.4569, "nll_loss": 0.42508581280708313, "rewards/accuracies": 0.875, "rewards/chosen": -0.15741556882858276, "rewards/margins": 0.12035935372114182, "rewards/rejected": -0.2777749300003052, "step": 6536 }, { "epoch": 17.897330595482547, "grad_norm": 6.2253737449646, "learning_rate": 1.0452054794520547e-07, "log_odds_chosen": 2.7369771003723145, "log_odds_ratio": -0.2719663679599762, "logits/chosen": 1.41129732131958, "logits/rejected": 1.5021653175354004, "logps/chosen": -2.4159059524536133, "logps/rejected": -4.9408488273620605, "loss": 0.553, "nll_loss": 0.5257642269134521, "rewards/accuracies": 0.875, "rewards/chosen": -0.24159061908721924, "rewards/margins": 0.25249430537223816, "rewards/rejected": -0.494084894657135, "step": 6537 }, { "epoch": 17.900068446269678, "grad_norm": 3.754943609237671, "learning_rate": 1.043835616438356e-07, "log_odds_chosen": 5.534003257751465, "log_odds_ratio": -0.19205372035503387, "logits/chosen": 1.0479680299758911, "logits/rejected": 1.0314393043518066, "logps/chosen": -1.8111201524734497, "logps/rejected": -7.1208176612854, "loss": 0.5296, "nll_loss": 0.5104033946990967, "rewards/accuracies": 0.875, "rewards/chosen": -0.18111202120780945, "rewards/margins": 0.5309697985649109, "rewards/rejected": -0.7120817303657532, "step": 6538 }, { "epoch": 17.90280629705681, "grad_norm": 4.4532318115234375, "learning_rate": 1.0424657534246576e-07, "log_odds_chosen": 1.5754929780960083, "log_odds_ratio": -0.44304516911506653, "logits/chosen": 1.1589021682739258, "logits/rejected": 1.1189191341400146, "logps/chosen": -1.7252554893493652, "logps/rejected": -3.0613861083984375, "loss": 0.4885, "nll_loss": 0.44418981671333313, "rewards/accuracies": 0.875, "rewards/chosen": -0.1725255399942398, "rewards/margins": 0.13361304998397827, "rewards/rejected": -0.3061386048793793, "step": 6539 }, { "epoch": 17.905544147843944, "grad_norm": 5.903564929962158, "learning_rate": 1.0410958904109588e-07, "log_odds_chosen": 2.9457342624664307, "log_odds_ratio": -0.12081381678581238, "logits/chosen": 1.3328359127044678, "logits/rejected": 1.2676329612731934, "logps/chosen": -2.099616289138794, "logps/rejected": -4.878515243530273, "loss": 0.6369, "nll_loss": 0.6247879266738892, "rewards/accuracies": 1.0, "rewards/chosen": -0.20996162295341492, "rewards/margins": 0.2778899371623993, "rewards/rejected": -0.4878515601158142, "step": 6540 }, { "epoch": 17.908281998631075, "grad_norm": 6.798361778259277, "learning_rate": 1.0397260273972602e-07, "log_odds_chosen": 3.3743767738342285, "log_odds_ratio": -0.4556131958961487, "logits/chosen": 1.247470736503601, "logits/rejected": 1.214674711227417, "logps/chosen": -1.6780664920806885, "logps/rejected": -4.9160966873168945, "loss": 0.5726, "nll_loss": 0.5270864963531494, "rewards/accuracies": 0.75, "rewards/chosen": -0.16780665516853333, "rewards/margins": 0.32380300760269165, "rewards/rejected": -0.4916096329689026, "step": 6541 }, { "epoch": 17.911019849418206, "grad_norm": 4.2471795082092285, "learning_rate": 1.0383561643835616e-07, "log_odds_chosen": 2.5580475330352783, "log_odds_ratio": -0.2623840570449829, "logits/chosen": 1.0078608989715576, "logits/rejected": 0.9892518520355225, "logps/chosen": -1.6635701656341553, "logps/rejected": -3.999140739440918, "loss": 0.6132, "nll_loss": 0.5869258642196655, "rewards/accuracies": 0.875, "rewards/chosen": -0.16635701060295105, "rewards/margins": 0.23355704545974731, "rewards/rejected": -0.39991408586502075, "step": 6542 }, { "epoch": 17.913757700205338, "grad_norm": 4.379092216491699, "learning_rate": 1.0369863013698629e-07, "log_odds_chosen": 2.0739827156066895, "log_odds_ratio": -0.23743975162506104, "logits/chosen": 1.050682783126831, "logits/rejected": 1.0656564235687256, "logps/chosen": -1.713763952255249, "logps/rejected": -3.568268299102783, "loss": 0.6174, "nll_loss": 0.593625545501709, "rewards/accuracies": 1.0, "rewards/chosen": -0.17137639224529266, "rewards/margins": 0.1854504644870758, "rewards/rejected": -0.35682687163352966, "step": 6543 }, { "epoch": 17.916495550992472, "grad_norm": 4.125190734863281, "learning_rate": 1.0356164383561643e-07, "log_odds_chosen": 1.245516300201416, "log_odds_ratio": -0.3368026912212372, "logits/chosen": 0.9407246708869934, "logits/rejected": 0.8841760158538818, "logps/chosen": -2.1038947105407715, "logps/rejected": -3.2065439224243164, "loss": 0.5412, "nll_loss": 0.5075244903564453, "rewards/accuracies": 0.875, "rewards/chosen": -0.21038946509361267, "rewards/margins": 0.11026494204998016, "rewards/rejected": -0.32065439224243164, "step": 6544 }, { "epoch": 17.919233401779604, "grad_norm": 3.773646116256714, "learning_rate": 1.0342465753424658e-07, "log_odds_chosen": 3.317885398864746, "log_odds_ratio": -0.08585134148597717, "logits/chosen": 1.1908023357391357, "logits/rejected": 1.208106517791748, "logps/chosen": -2.216276168823242, "logps/rejected": -5.381049633026123, "loss": 0.5967, "nll_loss": 0.5880982279777527, "rewards/accuracies": 1.0, "rewards/chosen": -0.2216276228427887, "rewards/margins": 0.31647732853889465, "rewards/rejected": -0.5381050109863281, "step": 6545 }, { "epoch": 17.921971252566735, "grad_norm": 7.050619602203369, "learning_rate": 1.0328767123287672e-07, "log_odds_chosen": 2.122971773147583, "log_odds_ratio": -0.8216590285301208, "logits/chosen": 0.9626284241676331, "logits/rejected": 0.9885199069976807, "logps/chosen": -2.325929880142212, "logps/rejected": -4.25618314743042, "loss": 0.5862, "nll_loss": 0.5040398240089417, "rewards/accuracies": 0.875, "rewards/chosen": -0.23259299993515015, "rewards/margins": 0.19302533566951752, "rewards/rejected": -0.42561835050582886, "step": 6546 }, { "epoch": 17.924709103353866, "grad_norm": 9.346893310546875, "learning_rate": 1.0315068493150684e-07, "log_odds_chosen": 1.917328119277954, "log_odds_ratio": -0.4701240062713623, "logits/chosen": 1.0838828086853027, "logits/rejected": 1.0598368644714355, "logps/chosen": -2.215566635131836, "logps/rejected": -3.9747684001922607, "loss": 0.5871, "nll_loss": 0.5400787591934204, "rewards/accuracies": 0.75, "rewards/chosen": -0.2215566635131836, "rewards/margins": 0.17592018842697144, "rewards/rejected": -0.39747685194015503, "step": 6547 }, { "epoch": 17.927446954141, "grad_norm": 3.585028886795044, "learning_rate": 1.0301369863013698e-07, "log_odds_chosen": 3.0030057430267334, "log_odds_ratio": -0.2647208869457245, "logits/chosen": 1.1193904876708984, "logits/rejected": 1.100411057472229, "logps/chosen": -1.5609339475631714, "logps/rejected": -4.345195293426514, "loss": 0.5102, "nll_loss": 0.48376578092575073, "rewards/accuracies": 0.875, "rewards/chosen": -0.15609338879585266, "rewards/margins": 0.2784261405467987, "rewards/rejected": -0.43451952934265137, "step": 6548 }, { "epoch": 17.930184804928132, "grad_norm": 7.302029609680176, "learning_rate": 1.0287671232876712e-07, "log_odds_chosen": 2.7380056381225586, "log_odds_ratio": -0.3742738962173462, "logits/chosen": 1.101626992225647, "logits/rejected": 1.1609183549880981, "logps/chosen": -2.2668731212615967, "logps/rejected": -4.825480937957764, "loss": 0.6513, "nll_loss": 0.6138973236083984, "rewards/accuracies": 0.875, "rewards/chosen": -0.22668734192848206, "rewards/margins": 0.25586074590682983, "rewards/rejected": -0.4825480878353119, "step": 6549 }, { "epoch": 17.932922655715263, "grad_norm": 3.66456937789917, "learning_rate": 1.0273972602739725e-07, "log_odds_chosen": 1.1614525318145752, "log_odds_ratio": -0.3678818941116333, "logits/chosen": 1.1256794929504395, "logits/rejected": 1.1225402355194092, "logps/chosen": -1.6303876638412476, "logps/rejected": -2.6561946868896484, "loss": 0.5074, "nll_loss": 0.4706055521965027, "rewards/accuracies": 0.875, "rewards/chosen": -0.16303876042366028, "rewards/margins": 0.10258070379495621, "rewards/rejected": -0.2656194865703583, "step": 6550 }, { "epoch": 17.935660506502394, "grad_norm": 3.4253592491149902, "learning_rate": 1.026027397260274e-07, "log_odds_chosen": 6.550507545471191, "log_odds_ratio": -0.012047970667481422, "logits/chosen": 1.3739960193634033, "logits/rejected": 1.4209362268447876, "logps/chosen": -1.9015439748764038, "logps/rejected": -8.278570175170898, "loss": 0.5333, "nll_loss": 0.5320584177970886, "rewards/accuracies": 1.0, "rewards/chosen": -0.19015440344810486, "rewards/margins": 0.6377025842666626, "rewards/rejected": -0.8278569579124451, "step": 6551 }, { "epoch": 17.93839835728953, "grad_norm": 3.406745195388794, "learning_rate": 1.0246575342465754e-07, "log_odds_chosen": 2.2468395233154297, "log_odds_ratio": -0.25198614597320557, "logits/chosen": 1.1783380508422852, "logits/rejected": 1.1991053819656372, "logps/chosen": -2.483931064605713, "logps/rejected": -4.625103950500488, "loss": 0.4691, "nll_loss": 0.443855881690979, "rewards/accuracies": 0.875, "rewards/chosen": -0.24839311838150024, "rewards/margins": 0.21411731839179993, "rewards/rejected": -0.4625104069709778, "step": 6552 }, { "epoch": 17.94113620807666, "grad_norm": 3.541666030883789, "learning_rate": 1.0232876712328766e-07, "log_odds_chosen": 4.741806983947754, "log_odds_ratio": -0.06828086078166962, "logits/chosen": 0.7865002155303955, "logits/rejected": 0.7371082305908203, "logps/chosen": -1.8501768112182617, "logps/rejected": -6.348429203033447, "loss": 0.6471, "nll_loss": 0.6402281522750854, "rewards/accuracies": 1.0, "rewards/chosen": -0.1850176900625229, "rewards/margins": 0.4498252868652344, "rewards/rejected": -0.6348429322242737, "step": 6553 }, { "epoch": 17.94387405886379, "grad_norm": 3.207522392272949, "learning_rate": 1.021917808219178e-07, "log_odds_chosen": 2.775949001312256, "log_odds_ratio": -0.17253205180168152, "logits/chosen": 1.1040699481964111, "logits/rejected": 1.0300424098968506, "logps/chosen": -1.3601689338684082, "logps/rejected": -3.840670108795166, "loss": 0.3869, "nll_loss": 0.3696129322052002, "rewards/accuracies": 1.0, "rewards/chosen": -0.13601690530776978, "rewards/margins": 0.24805012345314026, "rewards/rejected": -0.38406702876091003, "step": 6554 }, { "epoch": 17.946611909650922, "grad_norm": 3.3946125507354736, "learning_rate": 1.0205479452054794e-07, "log_odds_chosen": 1.932662010192871, "log_odds_ratio": -0.2755016088485718, "logits/chosen": 1.0726977586746216, "logits/rejected": 1.0889239311218262, "logps/chosen": -1.6162687540054321, "logps/rejected": -3.4144158363342285, "loss": 0.5143, "nll_loss": 0.4867495000362396, "rewards/accuracies": 1.0, "rewards/chosen": -0.16162686049938202, "rewards/margins": 0.1798146814107895, "rewards/rejected": -0.3414415419101715, "step": 6555 }, { "epoch": 17.949349760438057, "grad_norm": 4.470013618469238, "learning_rate": 1.0191780821917808e-07, "log_odds_chosen": 2.371890068054199, "log_odds_ratio": -0.1590615212917328, "logits/chosen": 1.0035539865493774, "logits/rejected": 0.9860634803771973, "logps/chosen": -1.7163145542144775, "logps/rejected": -3.8831069469451904, "loss": 0.4484, "nll_loss": 0.4325174391269684, "rewards/accuracies": 1.0, "rewards/chosen": -0.17163145542144775, "rewards/margins": 0.21667926013469696, "rewards/rejected": -0.3883107006549835, "step": 6556 }, { "epoch": 17.95208761122519, "grad_norm": 6.86106538772583, "learning_rate": 1.0178082191780822e-07, "log_odds_chosen": 2.201359272003174, "log_odds_ratio": -0.24323883652687073, "logits/chosen": 0.9207300543785095, "logits/rejected": 0.9045151472091675, "logps/chosen": -1.6447088718414307, "logps/rejected": -3.5772547721862793, "loss": 0.5643, "nll_loss": 0.5400108695030212, "rewards/accuracies": 0.875, "rewards/chosen": -0.16447089612483978, "rewards/margins": 0.19325457513332367, "rewards/rejected": -0.35772544145584106, "step": 6557 }, { "epoch": 17.95482546201232, "grad_norm": 7.134195804595947, "learning_rate": 1.0164383561643836e-07, "log_odds_chosen": 2.0868587493896484, "log_odds_ratio": -0.33840489387512207, "logits/chosen": 1.3191030025482178, "logits/rejected": 1.2615232467651367, "logps/chosen": -1.803757905960083, "logps/rejected": -3.6605448722839355, "loss": 0.5495, "nll_loss": 0.515704870223999, "rewards/accuracies": 0.75, "rewards/chosen": -0.18037578463554382, "rewards/margins": 0.18567870557308197, "rewards/rejected": -0.366054505109787, "step": 6558 }, { "epoch": 17.95756331279945, "grad_norm": 8.18244457244873, "learning_rate": 1.015068493150685e-07, "log_odds_chosen": 3.3842668533325195, "log_odds_ratio": -0.15540426969528198, "logits/chosen": 1.132873296737671, "logits/rejected": 1.1522046327590942, "logps/chosen": -1.948314905166626, "logps/rejected": -5.061055660247803, "loss": 0.5382, "nll_loss": 0.5226191282272339, "rewards/accuracies": 1.0, "rewards/chosen": -0.1948314905166626, "rewards/margins": 0.31127411127090454, "rewards/rejected": -0.5061056017875671, "step": 6559 }, { "epoch": 17.960301163586585, "grad_norm": 3.6348109245300293, "learning_rate": 1.0136986301369862e-07, "log_odds_chosen": 6.439178466796875, "log_odds_ratio": -0.06572182476520538, "logits/chosen": 1.3234411478042603, "logits/rejected": 1.3750251531600952, "logps/chosen": -2.46335506439209, "logps/rejected": -8.758480072021484, "loss": 0.6913, "nll_loss": 0.6847710609436035, "rewards/accuracies": 1.0, "rewards/chosen": -0.24633550643920898, "rewards/margins": 0.6295125484466553, "rewards/rejected": -0.8758480548858643, "step": 6560 }, { "epoch": 17.963039014373717, "grad_norm": 4.53125, "learning_rate": 1.0123287671232876e-07, "log_odds_chosen": 2.7878661155700684, "log_odds_ratio": -0.3720400929450989, "logits/chosen": 1.1675738096237183, "logits/rejected": 1.1015222072601318, "logps/chosen": -2.2079617977142334, "logps/rejected": -4.871353626251221, "loss": 0.517, "nll_loss": 0.47980085015296936, "rewards/accuracies": 0.75, "rewards/chosen": -0.22079618275165558, "rewards/margins": 0.26633918285369873, "rewards/rejected": -0.4871353507041931, "step": 6561 }, { "epoch": 17.965776865160848, "grad_norm": 4.1534576416015625, "learning_rate": 1.010958904109589e-07, "log_odds_chosen": 1.8969489336013794, "log_odds_ratio": -0.2812681496143341, "logits/chosen": 0.46703970432281494, "logits/rejected": 0.5057766437530518, "logps/chosen": -1.732264518737793, "logps/rejected": -3.4914560317993164, "loss": 0.6015, "nll_loss": 0.5734007358551025, "rewards/accuracies": 0.875, "rewards/chosen": -0.17322644591331482, "rewards/margins": 0.17591914534568787, "rewards/rejected": -0.3491455912590027, "step": 6562 }, { "epoch": 17.968514715947983, "grad_norm": 5.201061725616455, "learning_rate": 1.0095890410958902e-07, "log_odds_chosen": 1.0764847993850708, "log_odds_ratio": -0.34688901901245117, "logits/chosen": 1.1286070346832275, "logits/rejected": 1.1286269426345825, "logps/chosen": -2.11360502243042, "logps/rejected": -3.0416979789733887, "loss": 0.4173, "nll_loss": 0.3825671672821045, "rewards/accuracies": 1.0, "rewards/chosen": -0.21136051416397095, "rewards/margins": 0.0928092822432518, "rewards/rejected": -0.30416980385780334, "step": 6563 }, { "epoch": 17.971252566735114, "grad_norm": 4.202030181884766, "learning_rate": 1.0082191780821918e-07, "log_odds_chosen": 4.200023174285889, "log_odds_ratio": -0.17631472647190094, "logits/chosen": 1.0031949281692505, "logits/rejected": 1.0409069061279297, "logps/chosen": -1.7421565055847168, "logps/rejected": -5.556122779846191, "loss": 0.5911, "nll_loss": 0.5734758973121643, "rewards/accuracies": 1.0, "rewards/chosen": -0.1742156445980072, "rewards/margins": 0.381396621465683, "rewards/rejected": -0.5556122660636902, "step": 6564 }, { "epoch": 17.973990417522245, "grad_norm": 3.7945809364318848, "learning_rate": 1.0068493150684931e-07, "log_odds_chosen": 1.8537753820419312, "log_odds_ratio": -0.2290288209915161, "logits/chosen": 0.8644166588783264, "logits/rejected": 0.8333937525749207, "logps/chosen": -1.6397535800933838, "logps/rejected": -3.3045976161956787, "loss": 0.4731, "nll_loss": 0.4502338767051697, "rewards/accuracies": 1.0, "rewards/chosen": -0.16397535800933838, "rewards/margins": 0.16648438572883606, "rewards/rejected": -0.3304597735404968, "step": 6565 }, { "epoch": 17.976728268309376, "grad_norm": 3.2428863048553467, "learning_rate": 1.0054794520547945e-07, "log_odds_chosen": 2.4514267444610596, "log_odds_ratio": -0.156670942902565, "logits/chosen": 1.074164628982544, "logits/rejected": 1.0581915378570557, "logps/chosen": -1.3456180095672607, "logps/rejected": -3.5376195907592773, "loss": 0.4756, "nll_loss": 0.4599224328994751, "rewards/accuracies": 1.0, "rewards/chosen": -0.13456180691719055, "rewards/margins": 0.21920017898082733, "rewards/rejected": -0.3537620007991791, "step": 6566 }, { "epoch": 17.97946611909651, "grad_norm": 3.6640987396240234, "learning_rate": 1.0041095890410958e-07, "log_odds_chosen": 2.8137896060943604, "log_odds_ratio": -0.19834649562835693, "logits/chosen": 1.4821093082427979, "logits/rejected": 1.5296339988708496, "logps/chosen": -2.04168438911438, "logps/rejected": -4.623319149017334, "loss": 0.5553, "nll_loss": 0.5354963541030884, "rewards/accuracies": 1.0, "rewards/chosen": -0.204168438911438, "rewards/margins": 0.2581635117530823, "rewards/rejected": -0.46233195066452026, "step": 6567 }, { "epoch": 17.982203969883642, "grad_norm": 3.82413649559021, "learning_rate": 1.0027397260273972e-07, "log_odds_chosen": 1.5320961475372314, "log_odds_ratio": -0.25861087441444397, "logits/chosen": 1.1764357089996338, "logits/rejected": 1.1442128419876099, "logps/chosen": -1.9518334865570068, "logps/rejected": -3.3328232765197754, "loss": 0.5564, "nll_loss": 0.5305293202400208, "rewards/accuracies": 0.875, "rewards/chosen": -0.19518335163593292, "rewards/margins": 0.13809897005558014, "rewards/rejected": -0.33328235149383545, "step": 6568 }, { "epoch": 17.984941820670773, "grad_norm": 8.888676643371582, "learning_rate": 1.0013698630136986e-07, "log_odds_chosen": 1.85965895652771, "log_odds_ratio": -0.3917580246925354, "logits/chosen": 1.1224859952926636, "logits/rejected": 1.0963720083236694, "logps/chosen": -2.537947177886963, "logps/rejected": -4.300362586975098, "loss": 0.6696, "nll_loss": 0.6303901672363281, "rewards/accuracies": 0.875, "rewards/chosen": -0.25379472970962524, "rewards/margins": 0.17624150216579437, "rewards/rejected": -0.43003618717193604, "step": 6569 }, { "epoch": 17.987679671457904, "grad_norm": 3.261791467666626, "learning_rate": 1e-07, "log_odds_chosen": 3.1321375370025635, "log_odds_ratio": -0.18140950798988342, "logits/chosen": 1.2925546169281006, "logits/rejected": 1.3019366264343262, "logps/chosen": -1.7879774570465088, "logps/rejected": -4.7126078605651855, "loss": 0.503, "nll_loss": 0.484836220741272, "rewards/accuracies": 1.0, "rewards/chosen": -0.17879775166511536, "rewards/margins": 0.2924630641937256, "rewards/rejected": -0.47126078605651855, "step": 6570 }, { "epoch": 17.99041752224504, "grad_norm": 8.68612289428711, "learning_rate": 9.986301369863013e-08, "log_odds_chosen": 2.256284713745117, "log_odds_ratio": -0.5454277992248535, "logits/chosen": 0.9917638301849365, "logits/rejected": 0.9792876243591309, "logps/chosen": -2.2683420181274414, "logps/rejected": -4.412152290344238, "loss": 0.597, "nll_loss": 0.5424604415893555, "rewards/accuracies": 0.75, "rewards/chosen": -0.226834237575531, "rewards/margins": 0.21438102424144745, "rewards/rejected": -0.44121524691581726, "step": 6571 }, { "epoch": 17.99315537303217, "grad_norm": 4.476511001586914, "learning_rate": 9.972602739726027e-08, "log_odds_chosen": 1.7202316522598267, "log_odds_ratio": -0.25280728936195374, "logits/chosen": 1.0627154111862183, "logits/rejected": 1.1193052530288696, "logps/chosen": -2.344999313354492, "logps/rejected": -3.914550304412842, "loss": 0.5537, "nll_loss": 0.5284560322761536, "rewards/accuracies": 0.875, "rewards/chosen": -0.23449993133544922, "rewards/margins": 0.15695512294769287, "rewards/rejected": -0.3914550542831421, "step": 6572 }, { "epoch": 17.9958932238193, "grad_norm": 3.511881113052368, "learning_rate": 9.95890410958904e-08, "log_odds_chosen": 1.944881558418274, "log_odds_ratio": -0.2906126379966736, "logits/chosen": 1.4938836097717285, "logits/rejected": 1.45505952835083, "logps/chosen": -1.4335590600967407, "logps/rejected": -3.197880268096924, "loss": 0.4384, "nll_loss": 0.4093356728553772, "rewards/accuracies": 1.0, "rewards/chosen": -0.14335590600967407, "rewards/margins": 0.17643213272094727, "rewards/rejected": -0.31978803873062134, "step": 6573 }, { "epoch": 17.998631074606433, "grad_norm": 3.440805673599243, "learning_rate": 9.945205479452054e-08, "log_odds_chosen": 3.7434539794921875, "log_odds_ratio": -0.13721807301044464, "logits/chosen": 1.0780904293060303, "logits/rejected": 1.017132043838501, "logps/chosen": -1.623326301574707, "logps/rejected": -5.154523849487305, "loss": 0.4736, "nll_loss": 0.45987004041671753, "rewards/accuracies": 1.0, "rewards/chosen": -0.1623326539993286, "rewards/margins": 0.35311976075172424, "rewards/rejected": -0.5154524445533752, "step": 6574 }, { "epoch": 18.001368925393567, "grad_norm": 4.676530361175537, "learning_rate": 9.931506849315068e-08, "log_odds_chosen": 1.0172932147979736, "log_odds_ratio": -0.32243141531944275, "logits/chosen": 1.1806637048721313, "logits/rejected": 1.139808177947998, "logps/chosen": -1.6881650686264038, "logps/rejected": -2.5334794521331787, "loss": 0.6634, "nll_loss": 0.6311550736427307, "rewards/accuracies": 1.0, "rewards/chosen": -0.16881652176380157, "rewards/margins": 0.08453144133090973, "rewards/rejected": -0.2533479332923889, "step": 6575 }, { "epoch": 18.0041067761807, "grad_norm": 12.362318992614746, "learning_rate": 9.917808219178083e-08, "log_odds_chosen": 1.887332797050476, "log_odds_ratio": -0.4563697576522827, "logits/chosen": 0.9853579998016357, "logits/rejected": 0.9778141379356384, "logps/chosen": -2.186465263366699, "logps/rejected": -3.766449451446533, "loss": 0.5393, "nll_loss": 0.4936607778072357, "rewards/accuracies": 0.875, "rewards/chosen": -0.21864654123783112, "rewards/margins": 0.15799841284751892, "rewards/rejected": -0.37664493918418884, "step": 6576 }, { "epoch": 18.00684462696783, "grad_norm": 3.642930030822754, "learning_rate": 9.904109589041095e-08, "log_odds_chosen": 1.2295777797698975, "log_odds_ratio": -0.3399355113506317, "logits/chosen": 1.0922160148620605, "logits/rejected": 1.0369253158569336, "logps/chosen": -2.2984278202056885, "logps/rejected": -3.360182523727417, "loss": 0.4614, "nll_loss": 0.42737606167793274, "rewards/accuracies": 0.875, "rewards/chosen": -0.22984275221824646, "rewards/margins": 0.1061754822731018, "rewards/rejected": -0.33601826429367065, "step": 6577 }, { "epoch": 18.00958247775496, "grad_norm": 3.3997671604156494, "learning_rate": 9.890410958904109e-08, "log_odds_chosen": 4.603909492492676, "log_odds_ratio": -0.22222685813903809, "logits/chosen": 1.3104503154754639, "logits/rejected": 1.2968345880508423, "logps/chosen": -2.182156801223755, "logps/rejected": -6.681896209716797, "loss": 0.613, "nll_loss": 0.5907477736473083, "rewards/accuracies": 0.875, "rewards/chosen": -0.2182157039642334, "rewards/margins": 0.4499739110469818, "rewards/rejected": -0.6681896448135376, "step": 6578 }, { "epoch": 18.012320328542096, "grad_norm": 3.516526222229004, "learning_rate": 9.876712328767123e-08, "log_odds_chosen": 3.1799445152282715, "log_odds_ratio": -0.21925362944602966, "logits/chosen": 1.2010345458984375, "logits/rejected": 1.1081726551055908, "logps/chosen": -1.2507894039154053, "logps/rejected": -4.081127166748047, "loss": 0.4301, "nll_loss": 0.40817689895629883, "rewards/accuracies": 1.0, "rewards/chosen": -0.125078946352005, "rewards/margins": 0.2830337882041931, "rewards/rejected": -0.40811270475387573, "step": 6579 }, { "epoch": 18.015058179329227, "grad_norm": 3.610318183898926, "learning_rate": 9.863013698630136e-08, "log_odds_chosen": 2.965123176574707, "log_odds_ratio": -0.2508542537689209, "logits/chosen": 0.8601052165031433, "logits/rejected": 0.7844486236572266, "logps/chosen": -1.4253641366958618, "logps/rejected": -4.213809490203857, "loss": 0.4845, "nll_loss": 0.4594075679779053, "rewards/accuracies": 0.875, "rewards/chosen": -0.14253643155097961, "rewards/margins": 0.2788445055484772, "rewards/rejected": -0.4213809370994568, "step": 6580 }, { "epoch": 18.017796030116358, "grad_norm": 3.286165237426758, "learning_rate": 9.84931506849315e-08, "log_odds_chosen": 2.867794990539551, "log_odds_ratio": -0.2035095989704132, "logits/chosen": 1.228415846824646, "logits/rejected": 1.2190499305725098, "logps/chosen": -1.5058605670928955, "logps/rejected": -4.149073123931885, "loss": 0.4234, "nll_loss": 0.4030027985572815, "rewards/accuracies": 1.0, "rewards/chosen": -0.1505860537290573, "rewards/margins": 0.2643212676048279, "rewards/rejected": -0.4149073362350464, "step": 6581 }, { "epoch": 18.02053388090349, "grad_norm": 3.1135125160217285, "learning_rate": 9.835616438356165e-08, "log_odds_chosen": 3.768533706665039, "log_odds_ratio": -0.13486053049564362, "logits/chosen": 1.1933963298797607, "logits/rejected": 1.2412946224212646, "logps/chosen": -2.0149693489074707, "logps/rejected": -5.604914665222168, "loss": 0.5371, "nll_loss": 0.5236226320266724, "rewards/accuracies": 1.0, "rewards/chosen": -0.2014969438314438, "rewards/margins": 0.3589945137500763, "rewards/rejected": -0.5604914426803589, "step": 6582 }, { "epoch": 18.023271731690624, "grad_norm": 4.594855785369873, "learning_rate": 9.821917808219179e-08, "log_odds_chosen": 2.6769261360168457, "log_odds_ratio": -0.2479388415813446, "logits/chosen": 1.201809287071228, "logits/rejected": 1.1850135326385498, "logps/chosen": -1.8773744106292725, "logps/rejected": -4.348342418670654, "loss": 0.4585, "nll_loss": 0.43370479345321655, "rewards/accuracies": 1.0, "rewards/chosen": -0.18773746490478516, "rewards/margins": 0.24709677696228027, "rewards/rejected": -0.43483424186706543, "step": 6583 }, { "epoch": 18.026009582477755, "grad_norm": 10.347081184387207, "learning_rate": 9.808219178082191e-08, "log_odds_chosen": 0.16779398918151855, "log_odds_ratio": -0.7215436697006226, "logits/chosen": 1.0203571319580078, "logits/rejected": 0.9185539484024048, "logps/chosen": -2.696547746658325, "logps/rejected": -2.8030834197998047, "loss": 0.5763, "nll_loss": 0.5041686296463013, "rewards/accuracies": 0.625, "rewards/chosen": -0.269654780626297, "rewards/margins": 0.010653559118509293, "rewards/rejected": -0.280308336019516, "step": 6584 }, { "epoch": 18.028747433264886, "grad_norm": 4.340653896331787, "learning_rate": 9.794520547945205e-08, "log_odds_chosen": 2.9550905227661133, "log_odds_ratio": -0.1934814453125, "logits/chosen": 1.0535016059875488, "logits/rejected": 1.04897940158844, "logps/chosen": -2.6134514808654785, "logps/rejected": -5.464562892913818, "loss": 0.5852, "nll_loss": 0.5658913850784302, "rewards/accuracies": 1.0, "rewards/chosen": -0.26134517788887024, "rewards/margins": 0.28511112928390503, "rewards/rejected": -0.5464563369750977, "step": 6585 }, { "epoch": 18.031485284052017, "grad_norm": 10.597186088562012, "learning_rate": 9.780821917808219e-08, "log_odds_chosen": 1.5994620323181152, "log_odds_ratio": -0.665728747844696, "logits/chosen": 0.9440476894378662, "logits/rejected": 0.9191900491714478, "logps/chosen": -2.4104676246643066, "logps/rejected": -3.909487247467041, "loss": 0.5691, "nll_loss": 0.5025107264518738, "rewards/accuracies": 0.875, "rewards/chosen": -0.24104677140712738, "rewards/margins": 0.14990195631980896, "rewards/rejected": -0.3909487724304199, "step": 6586 }, { "epoch": 18.034223134839152, "grad_norm": 4.263643741607666, "learning_rate": 9.767123287671232e-08, "log_odds_chosen": 2.8258626461029053, "log_odds_ratio": -0.11614919453859329, "logits/chosen": 1.1170490980148315, "logits/rejected": 1.1533539295196533, "logps/chosen": -2.46541166305542, "logps/rejected": -5.176960468292236, "loss": 0.5004, "nll_loss": 0.4888246953487396, "rewards/accuracies": 1.0, "rewards/chosen": -0.24654117226600647, "rewards/margins": 0.27115491032600403, "rewards/rejected": -0.5176960825920105, "step": 6587 }, { "epoch": 18.036960985626283, "grad_norm": 7.123854637145996, "learning_rate": 9.753424657534245e-08, "log_odds_chosen": 4.828310966491699, "log_odds_ratio": -0.34791889786720276, "logits/chosen": 1.1938599348068237, "logits/rejected": 1.1983119249343872, "logps/chosen": -2.3392882347106934, "logps/rejected": -6.939566612243652, "loss": 0.5718, "nll_loss": 0.5370076894760132, "rewards/accuracies": 0.875, "rewards/chosen": -0.2339288294315338, "rewards/margins": 0.4600277841091156, "rewards/rejected": -0.6939566135406494, "step": 6588 }, { "epoch": 18.039698836413415, "grad_norm": 2.875577211380005, "learning_rate": 9.739726027397261e-08, "log_odds_chosen": 3.123722553253174, "log_odds_ratio": -0.1344095766544342, "logits/chosen": 1.31194007396698, "logits/rejected": 1.2687464952468872, "logps/chosen": -1.5169668197631836, "logps/rejected": -4.388646125793457, "loss": 0.5272, "nll_loss": 0.5137274265289307, "rewards/accuracies": 1.0, "rewards/chosen": -0.15169668197631836, "rewards/margins": 0.2871679961681366, "rewards/rejected": -0.43886464834213257, "step": 6589 }, { "epoch": 18.04243668720055, "grad_norm": 3.422637701034546, "learning_rate": 9.726027397260273e-08, "log_odds_chosen": 3.1465036869049072, "log_odds_ratio": -0.11879618465900421, "logits/chosen": 1.1375881433486938, "logits/rejected": 1.0917482376098633, "logps/chosen": -1.36288583278656, "logps/rejected": -4.236392974853516, "loss": 0.4221, "nll_loss": 0.4102029502391815, "rewards/accuracies": 1.0, "rewards/chosen": -0.136288583278656, "rewards/margins": 0.28735068440437317, "rewards/rejected": -0.4236392676830292, "step": 6590 }, { "epoch": 18.04517453798768, "grad_norm": 6.218414306640625, "learning_rate": 9.712328767123287e-08, "log_odds_chosen": 1.9109410047531128, "log_odds_ratio": -0.39166688919067383, "logits/chosen": 1.1856247186660767, "logits/rejected": 1.2339869737625122, "logps/chosen": -2.2687857151031494, "logps/rejected": -4.007926940917969, "loss": 0.5636, "nll_loss": 0.5244314670562744, "rewards/accuracies": 0.75, "rewards/chosen": -0.2268785834312439, "rewards/margins": 0.1739141047000885, "rewards/rejected": -0.4007927179336548, "step": 6591 }, { "epoch": 18.04791238877481, "grad_norm": 9.679457664489746, "learning_rate": 9.698630136986301e-08, "log_odds_chosen": 2.3089077472686768, "log_odds_ratio": -0.44759970903396606, "logits/chosen": 1.3827638626098633, "logits/rejected": 1.4195621013641357, "logps/chosen": -2.7203683853149414, "logps/rejected": -4.91224479675293, "loss": 0.6697, "nll_loss": 0.6249779462814331, "rewards/accuracies": 0.875, "rewards/chosen": -0.2720368504524231, "rewards/margins": 0.2191876769065857, "rewards/rejected": -0.4912245571613312, "step": 6592 }, { "epoch": 18.050650239561943, "grad_norm": 6.985458850860596, "learning_rate": 9.684931506849315e-08, "log_odds_chosen": 3.4993014335632324, "log_odds_ratio": -0.16054151952266693, "logits/chosen": 1.2601374387741089, "logits/rejected": 1.2761874198913574, "logps/chosen": -2.3873209953308105, "logps/rejected": -5.730079650878906, "loss": 0.6424, "nll_loss": 0.6263703107833862, "rewards/accuracies": 1.0, "rewards/chosen": -0.23873209953308105, "rewards/margins": 0.33427584171295166, "rewards/rejected": -0.5730079412460327, "step": 6593 }, { "epoch": 18.053388090349078, "grad_norm": 3.5600247383117676, "learning_rate": 9.671232876712327e-08, "log_odds_chosen": 3.4380247592926025, "log_odds_ratio": -0.17802810668945312, "logits/chosen": 0.9468740820884705, "logits/rejected": 0.9088326692581177, "logps/chosen": -1.862908124923706, "logps/rejected": -5.109984874725342, "loss": 0.5135, "nll_loss": 0.49569645524024963, "rewards/accuracies": 1.0, "rewards/chosen": -0.18629083037376404, "rewards/margins": 0.32470768690109253, "rewards/rejected": -0.5109984874725342, "step": 6594 }, { "epoch": 18.05612594113621, "grad_norm": 3.6613454818725586, "learning_rate": 9.657534246575343e-08, "log_odds_chosen": 3.5081558227539062, "log_odds_ratio": -0.22084647417068481, "logits/chosen": 0.9240928888320923, "logits/rejected": 0.9401136636734009, "logps/chosen": -1.7209954261779785, "logps/rejected": -5.020930767059326, "loss": 0.6157, "nll_loss": 0.5935760736465454, "rewards/accuracies": 0.875, "rewards/chosen": -0.1720995455980301, "rewards/margins": 0.32999348640441895, "rewards/rejected": -0.5020930767059326, "step": 6595 }, { "epoch": 18.05886379192334, "grad_norm": 4.505937099456787, "learning_rate": 9.643835616438357e-08, "log_odds_chosen": 2.344937324523926, "log_odds_ratio": -0.12484914064407349, "logits/chosen": 1.1010428667068481, "logits/rejected": 1.1913633346557617, "logps/chosen": -2.6645896434783936, "logps/rejected": -4.915960311889648, "loss": 0.5901, "nll_loss": 0.5776523351669312, "rewards/accuracies": 1.0, "rewards/chosen": -0.2664589583873749, "rewards/margins": 0.22513708472251892, "rewards/rejected": -0.4915960431098938, "step": 6596 }, { "epoch": 18.06160164271047, "grad_norm": 3.860891342163086, "learning_rate": 9.630136986301369e-08, "log_odds_chosen": 2.276020050048828, "log_odds_ratio": -0.3182803690433502, "logits/chosen": 1.0433346033096313, "logits/rejected": 1.007119059562683, "logps/chosen": -1.862055778503418, "logps/rejected": -4.035100936889648, "loss": 0.5013, "nll_loss": 0.4694242477416992, "rewards/accuracies": 0.875, "rewards/chosen": -0.18620556592941284, "rewards/margins": 0.2173045128583908, "rewards/rejected": -0.40351009368896484, "step": 6597 }, { "epoch": 18.064339493497606, "grad_norm": 3.6805880069732666, "learning_rate": 9.616438356164383e-08, "log_odds_chosen": 4.454319477081299, "log_odds_ratio": -0.051561783999204636, "logits/chosen": 0.9566609859466553, "logits/rejected": 0.9328278303146362, "logps/chosen": -1.711838722229004, "logps/rejected": -5.9564313888549805, "loss": 0.4709, "nll_loss": 0.46571239829063416, "rewards/accuracies": 1.0, "rewards/chosen": -0.17118388414382935, "rewards/margins": 0.42445921897888184, "rewards/rejected": -0.5956431031227112, "step": 6598 }, { "epoch": 18.067077344284737, "grad_norm": 4.831991195678711, "learning_rate": 9.602739726027397e-08, "log_odds_chosen": 1.794360876083374, "log_odds_ratio": -0.3454221487045288, "logits/chosen": 1.4347243309020996, "logits/rejected": 1.4147437810897827, "logps/chosen": -2.3464131355285645, "logps/rejected": -4.055089473724365, "loss": 0.5599, "nll_loss": 0.5253125429153442, "rewards/accuracies": 0.75, "rewards/chosen": -0.23464131355285645, "rewards/margins": 0.1708676666021347, "rewards/rejected": -0.40550896525382996, "step": 6599 }, { "epoch": 18.069815195071868, "grad_norm": 3.4662115573883057, "learning_rate": 9.58904109589041e-08, "log_odds_chosen": 4.382922649383545, "log_odds_ratio": -0.17546647787094116, "logits/chosen": 1.1119507551193237, "logits/rejected": 1.1262423992156982, "logps/chosen": -2.11665415763855, "logps/rejected": -6.249993801116943, "loss": 0.482, "nll_loss": 0.4644882380962372, "rewards/accuracies": 1.0, "rewards/chosen": -0.21166542172431946, "rewards/margins": 0.4133339822292328, "rewards/rejected": -0.6249994039535522, "step": 6600 }, { "epoch": 18.072553045859, "grad_norm": 4.154529094696045, "learning_rate": 9.575342465753425e-08, "log_odds_chosen": 1.7705018520355225, "log_odds_ratio": -0.3177734911441803, "logits/chosen": 1.009055256843567, "logits/rejected": 0.9663729071617126, "logps/chosen": -1.7020189762115479, "logps/rejected": -3.347525119781494, "loss": 0.4452, "nll_loss": 0.41344696283340454, "rewards/accuracies": 0.875, "rewards/chosen": -0.17020189762115479, "rewards/margins": 0.16455061733722687, "rewards/rejected": -0.33475250005722046, "step": 6601 }, { "epoch": 18.075290896646134, "grad_norm": 4.0115861892700195, "learning_rate": 9.561643835616439e-08, "log_odds_chosen": 1.637784481048584, "log_odds_ratio": -0.2795557379722595, "logits/chosen": 1.1399577856063843, "logits/rejected": 1.0915327072143555, "logps/chosen": -2.147303819656372, "logps/rejected": -3.6880035400390625, "loss": 0.6224, "nll_loss": 0.5944336652755737, "rewards/accuracies": 1.0, "rewards/chosen": -0.2147303819656372, "rewards/margins": 0.15406998991966248, "rewards/rejected": -0.36880040168762207, "step": 6602 }, { "epoch": 18.078028747433265, "grad_norm": 3.6450448036193848, "learning_rate": 9.547945205479452e-08, "log_odds_chosen": 3.189548969268799, "log_odds_ratio": -0.07689958065748215, "logits/chosen": 0.9901312589645386, "logits/rejected": 0.9917969703674316, "logps/chosen": -2.1360833644866943, "logps/rejected": -5.146675109863281, "loss": 0.5394, "nll_loss": 0.531745433807373, "rewards/accuracies": 1.0, "rewards/chosen": -0.21360835433006287, "rewards/margins": 0.30105918645858765, "rewards/rejected": -0.5146675109863281, "step": 6603 }, { "epoch": 18.080766598220396, "grad_norm": 3.4193828105926514, "learning_rate": 9.534246575342465e-08, "log_odds_chosen": 3.628572463989258, "log_odds_ratio": -0.13259290158748627, "logits/chosen": 1.2052669525146484, "logits/rejected": 1.1875007152557373, "logps/chosen": -1.3135459423065186, "logps/rejected": -4.613256454467773, "loss": 0.4669, "nll_loss": 0.4536649286746979, "rewards/accuracies": 1.0, "rewards/chosen": -0.13135458528995514, "rewards/margins": 0.3299710750579834, "rewards/rejected": -0.46132567524909973, "step": 6604 }, { "epoch": 18.083504449007528, "grad_norm": 3.568598985671997, "learning_rate": 9.520547945205479e-08, "log_odds_chosen": 2.6115801334381104, "log_odds_ratio": -0.16726930439472198, "logits/chosen": 1.2070677280426025, "logits/rejected": 1.2324237823486328, "logps/chosen": -1.6244853734970093, "logps/rejected": -3.9916372299194336, "loss": 0.4633, "nll_loss": 0.446584552526474, "rewards/accuracies": 1.0, "rewards/chosen": -0.16244854032993317, "rewards/margins": 0.2367151975631714, "rewards/rejected": -0.39916372299194336, "step": 6605 }, { "epoch": 18.086242299794662, "grad_norm": 3.4362757205963135, "learning_rate": 9.506849315068493e-08, "log_odds_chosen": 2.0937955379486084, "log_odds_ratio": -0.19218464195728302, "logits/chosen": 0.931630551815033, "logits/rejected": 0.9058055877685547, "logps/chosen": -1.7852022647857666, "logps/rejected": -3.7267632484436035, "loss": 0.5683, "nll_loss": 0.5490453243255615, "rewards/accuracies": 1.0, "rewards/chosen": -0.17852023243904114, "rewards/margins": 0.19415611028671265, "rewards/rejected": -0.3726763129234314, "step": 6606 }, { "epoch": 18.088980150581794, "grad_norm": 3.7758984565734863, "learning_rate": 9.493150684931505e-08, "log_odds_chosen": 2.6022047996520996, "log_odds_ratio": -0.22007623314857483, "logits/chosen": 0.8969740271568298, "logits/rejected": 0.8507148027420044, "logps/chosen": -2.194100856781006, "logps/rejected": -4.651302337646484, "loss": 0.5226, "nll_loss": 0.5005787014961243, "rewards/accuracies": 1.0, "rewards/chosen": -0.21941007673740387, "rewards/margins": 0.24572016298770905, "rewards/rejected": -0.4651302695274353, "step": 6607 }, { "epoch": 18.091718001368925, "grad_norm": 3.3210504055023193, "learning_rate": 9.47945205479452e-08, "log_odds_chosen": 3.081327438354492, "log_odds_ratio": -0.21100279688835144, "logits/chosen": 1.1091530323028564, "logits/rejected": 1.0751068592071533, "logps/chosen": -1.1210269927978516, "logps/rejected": -3.857325315475464, "loss": 0.515, "nll_loss": 0.4938797652721405, "rewards/accuracies": 1.0, "rewards/chosen": -0.1121027022600174, "rewards/margins": 0.2736298441886902, "rewards/rejected": -0.3857325315475464, "step": 6608 }, { "epoch": 18.094455852156056, "grad_norm": 4.577727794647217, "learning_rate": 9.465753424657534e-08, "log_odds_chosen": 1.3249294757843018, "log_odds_ratio": -0.4459906220436096, "logits/chosen": 1.1810804605484009, "logits/rejected": 1.1307282447814941, "logps/chosen": -2.497354030609131, "logps/rejected": -3.6984364986419678, "loss": 0.5601, "nll_loss": 0.5154992938041687, "rewards/accuracies": 0.875, "rewards/chosen": -0.24973538517951965, "rewards/margins": 0.12010826915502548, "rewards/rejected": -0.3698436915874481, "step": 6609 }, { "epoch": 18.09719370294319, "grad_norm": 3.5768814086914062, "learning_rate": 9.452054794520547e-08, "log_odds_chosen": 1.4312708377838135, "log_odds_ratio": -0.32121703028678894, "logits/chosen": 1.0815443992614746, "logits/rejected": 1.0508054494857788, "logps/chosen": -1.8380630016326904, "logps/rejected": -3.1558337211608887, "loss": 0.51, "nll_loss": 0.477839857339859, "rewards/accuracies": 1.0, "rewards/chosen": -0.18380630016326904, "rewards/margins": 0.1317770928144455, "rewards/rejected": -0.31558337807655334, "step": 6610 }, { "epoch": 18.099931553730322, "grad_norm": 3.6642775535583496, "learning_rate": 9.438356164383561e-08, "log_odds_chosen": 6.073573589324951, "log_odds_ratio": -0.09356918931007385, "logits/chosen": 1.3351460695266724, "logits/rejected": 1.3795546293258667, "logps/chosen": -2.22273588180542, "logps/rejected": -8.12858772277832, "loss": 0.5517, "nll_loss": 0.5423453450202942, "rewards/accuracies": 1.0, "rewards/chosen": -0.22227360308170319, "rewards/margins": 0.5905852317810059, "rewards/rejected": -0.8128587603569031, "step": 6611 }, { "epoch": 18.102669404517453, "grad_norm": 5.742891311645508, "learning_rate": 9.424657534246575e-08, "log_odds_chosen": 2.651212215423584, "log_odds_ratio": -0.4042767286300659, "logits/chosen": 1.135606288909912, "logits/rejected": 1.2709033489227295, "logps/chosen": -2.2490053176879883, "logps/rejected": -4.776344299316406, "loss": 0.5777, "nll_loss": 0.5372393131256104, "rewards/accuracies": 0.75, "rewards/chosen": -0.2249005287885666, "rewards/margins": 0.25273388624191284, "rewards/rejected": -0.47763440012931824, "step": 6612 }, { "epoch": 18.105407255304584, "grad_norm": 3.9437103271484375, "learning_rate": 9.410958904109589e-08, "log_odds_chosen": 1.9436148405075073, "log_odds_ratio": -0.2903059422969818, "logits/chosen": 0.8990000486373901, "logits/rejected": 0.9345971941947937, "logps/chosen": -2.221515417098999, "logps/rejected": -4.03295373916626, "loss": 0.516, "nll_loss": 0.48691946268081665, "rewards/accuracies": 1.0, "rewards/chosen": -0.22215156257152557, "rewards/margins": 0.18114382028579712, "rewards/rejected": -0.4032953977584839, "step": 6613 }, { "epoch": 18.10814510609172, "grad_norm": 3.7108161449432373, "learning_rate": 9.397260273972603e-08, "log_odds_chosen": 2.1770763397216797, "log_odds_ratio": -0.22013823688030243, "logits/chosen": 1.1329970359802246, "logits/rejected": 1.109740972518921, "logps/chosen": -1.5619959831237793, "logps/rejected": -3.5302319526672363, "loss": 0.4635, "nll_loss": 0.44150012731552124, "rewards/accuracies": 1.0, "rewards/chosen": -0.1561996042728424, "rewards/margins": 0.1968235969543457, "rewards/rejected": -0.3530231714248657, "step": 6614 }, { "epoch": 18.11088295687885, "grad_norm": 3.7155771255493164, "learning_rate": 9.383561643835616e-08, "log_odds_chosen": 3.1232824325561523, "log_odds_ratio": -0.2800736129283905, "logits/chosen": 1.0734213590621948, "logits/rejected": 1.0258358716964722, "logps/chosen": -2.0268375873565674, "logps/rejected": -5.009203910827637, "loss": 0.5653, "nll_loss": 0.5373095870018005, "rewards/accuracies": 0.875, "rewards/chosen": -0.20268377661705017, "rewards/margins": 0.2982366681098938, "rewards/rejected": -0.5009204149246216, "step": 6615 }, { "epoch": 18.11362080766598, "grad_norm": 3.688838481903076, "learning_rate": 9.36986301369863e-08, "log_odds_chosen": 3.9981577396392822, "log_odds_ratio": -0.12101484835147858, "logits/chosen": 1.1676669120788574, "logits/rejected": 1.1763029098510742, "logps/chosen": -1.4414992332458496, "logps/rejected": -5.1302900314331055, "loss": 0.5591, "nll_loss": 0.5469722151756287, "rewards/accuracies": 1.0, "rewards/chosen": -0.14414992928504944, "rewards/margins": 0.3688790798187256, "rewards/rejected": -0.5130289793014526, "step": 6616 }, { "epoch": 18.116358658453116, "grad_norm": 13.443986892700195, "learning_rate": 9.356164383561643e-08, "log_odds_chosen": 1.7014256715774536, "log_odds_ratio": -0.7452988624572754, "logits/chosen": 0.9674469232559204, "logits/rejected": 0.9358233213424683, "logps/chosen": -3.0704355239868164, "logps/rejected": -4.616765975952148, "loss": 0.6169, "nll_loss": 0.5423620939254761, "rewards/accuracies": 0.875, "rewards/chosen": -0.30704355239868164, "rewards/margins": 0.1546330451965332, "rewards/rejected": -0.46167659759521484, "step": 6617 }, { "epoch": 18.119096509240247, "grad_norm": 3.4470927715301514, "learning_rate": 9.342465753424657e-08, "log_odds_chosen": 3.551196813583374, "log_odds_ratio": -0.18734250962734222, "logits/chosen": 1.3772780895233154, "logits/rejected": 1.3677747249603271, "logps/chosen": -1.7282501459121704, "logps/rejected": -5.09576416015625, "loss": 0.5358, "nll_loss": 0.5170513391494751, "rewards/accuracies": 1.0, "rewards/chosen": -0.17282502353191376, "rewards/margins": 0.33675137162208557, "rewards/rejected": -0.5095763802528381, "step": 6618 }, { "epoch": 18.12183436002738, "grad_norm": 5.941826343536377, "learning_rate": 9.32876712328767e-08, "log_odds_chosen": 3.2583179473876953, "log_odds_ratio": -0.3257814347743988, "logits/chosen": 1.0969254970550537, "logits/rejected": 1.1107709407806396, "logps/chosen": -1.7185053825378418, "logps/rejected": -4.698395252227783, "loss": 0.5835, "nll_loss": 0.5509443879127502, "rewards/accuracies": 0.875, "rewards/chosen": -0.1718505322933197, "rewards/margins": 0.29798901081085205, "rewards/rejected": -0.46983954310417175, "step": 6619 }, { "epoch": 18.12457221081451, "grad_norm": 3.5071170330047607, "learning_rate": 9.315068493150684e-08, "log_odds_chosen": 2.199894666671753, "log_odds_ratio": -0.19326576590538025, "logits/chosen": 1.0670865774154663, "logits/rejected": 1.103553295135498, "logps/chosen": -1.8490028381347656, "logps/rejected": -3.877639055252075, "loss": 0.5077, "nll_loss": 0.4883369505405426, "rewards/accuracies": 1.0, "rewards/chosen": -0.18490028381347656, "rewards/margins": 0.2028636336326599, "rewards/rejected": -0.3877639174461365, "step": 6620 }, { "epoch": 18.127310061601644, "grad_norm": 3.808718681335449, "learning_rate": 9.301369863013698e-08, "log_odds_chosen": 2.331704616546631, "log_odds_ratio": -0.3033003509044647, "logits/chosen": 1.0187948942184448, "logits/rejected": 1.0096263885498047, "logps/chosen": -1.8480331897735596, "logps/rejected": -4.0269598960876465, "loss": 0.4937, "nll_loss": 0.4633222818374634, "rewards/accuracies": 1.0, "rewards/chosen": -0.1848033219575882, "rewards/margins": 0.21789267659187317, "rewards/rejected": -0.40269601345062256, "step": 6621 }, { "epoch": 18.130047912388775, "grad_norm": 3.529884099960327, "learning_rate": 9.287671232876712e-08, "log_odds_chosen": 2.9637808799743652, "log_odds_ratio": -0.18187788128852844, "logits/chosen": 1.0167399644851685, "logits/rejected": 1.0336565971374512, "logps/chosen": -1.9365458488464355, "logps/rejected": -4.685613632202148, "loss": 0.4793, "nll_loss": 0.461142897605896, "rewards/accuracies": 1.0, "rewards/chosen": -0.19365458190441132, "rewards/margins": 0.2749067544937134, "rewards/rejected": -0.4685613512992859, "step": 6622 }, { "epoch": 18.132785763175907, "grad_norm": 4.365805625915527, "learning_rate": 9.273972602739726e-08, "log_odds_chosen": 5.094415664672852, "log_odds_ratio": -0.13801352679729462, "logits/chosen": 1.1911132335662842, "logits/rejected": 1.1580532789230347, "logps/chosen": -2.075222969055176, "logps/rejected": -6.993641376495361, "loss": 0.5226, "nll_loss": 0.5087937116622925, "rewards/accuracies": 0.875, "rewards/chosen": -0.20752231776714325, "rewards/margins": 0.4918419122695923, "rewards/rejected": -0.699364185333252, "step": 6623 }, { "epoch": 18.135523613963038, "grad_norm": 4.245298385620117, "learning_rate": 9.260273972602739e-08, "log_odds_chosen": 2.8053016662597656, "log_odds_ratio": -0.23104660212993622, "logits/chosen": 1.0567364692687988, "logits/rejected": 1.0734702348709106, "logps/chosen": -1.6781057119369507, "logps/rejected": -4.293738842010498, "loss": 0.5806, "nll_loss": 0.5575276613235474, "rewards/accuracies": 0.875, "rewards/chosen": -0.1678105592727661, "rewards/margins": 0.2615633010864258, "rewards/rejected": -0.4293738603591919, "step": 6624 }, { "epoch": 18.138261464750173, "grad_norm": 3.295085906982422, "learning_rate": 9.246575342465753e-08, "log_odds_chosen": 5.297828674316406, "log_odds_ratio": -0.19767670333385468, "logits/chosen": 1.192652702331543, "logits/rejected": 1.1668479442596436, "logps/chosen": -1.6343986988067627, "logps/rejected": -6.719639301300049, "loss": 0.7126, "nll_loss": 0.6928685307502747, "rewards/accuracies": 1.0, "rewards/chosen": -0.16343988478183746, "rewards/margins": 0.5085240602493286, "rewards/rejected": -0.6719639301300049, "step": 6625 }, { "epoch": 18.140999315537304, "grad_norm": 3.6960902214050293, "learning_rate": 9.232876712328768e-08, "log_odds_chosen": 4.322554111480713, "log_odds_ratio": -0.03984585776925087, "logits/chosen": 1.1172070503234863, "logits/rejected": 1.086661696434021, "logps/chosen": -1.842025876045227, "logps/rejected": -5.938717842102051, "loss": 0.5073, "nll_loss": 0.5032727122306824, "rewards/accuracies": 1.0, "rewards/chosen": -0.18420258164405823, "rewards/margins": 0.4096692204475403, "rewards/rejected": -0.5938717722892761, "step": 6626 }, { "epoch": 18.143737166324435, "grad_norm": 8.983904838562012, "learning_rate": 9.21917808219178e-08, "log_odds_chosen": 2.678372383117676, "log_odds_ratio": -0.30025598406791687, "logits/chosen": 1.240098237991333, "logits/rejected": 1.2536625862121582, "logps/chosen": -2.054962158203125, "logps/rejected": -4.611769199371338, "loss": 0.5867, "nll_loss": 0.5566883087158203, "rewards/accuracies": 0.75, "rewards/chosen": -0.20549622178077698, "rewards/margins": 0.25568071007728577, "rewards/rejected": -0.46117693185806274, "step": 6627 }, { "epoch": 18.146475017111566, "grad_norm": 4.695934295654297, "learning_rate": 9.205479452054794e-08, "log_odds_chosen": 2.9135043621063232, "log_odds_ratio": -0.531989574432373, "logits/chosen": 1.058802604675293, "logits/rejected": 1.0234546661376953, "logps/chosen": -2.0311050415039062, "logps/rejected": -4.76409387588501, "loss": 0.5912, "nll_loss": 0.537970781326294, "rewards/accuracies": 0.875, "rewards/chosen": -0.2031104862689972, "rewards/margins": 0.2732989192008972, "rewards/rejected": -0.476409375667572, "step": 6628 }, { "epoch": 18.1492128678987, "grad_norm": 5.675043106079102, "learning_rate": 9.191780821917808e-08, "log_odds_chosen": 2.0514516830444336, "log_odds_ratio": -0.15581706166267395, "logits/chosen": 1.2878291606903076, "logits/rejected": 1.2772873640060425, "logps/chosen": -2.0470714569091797, "logps/rejected": -3.951749801635742, "loss": 0.584, "nll_loss": 0.5683942437171936, "rewards/accuracies": 1.0, "rewards/chosen": -0.20470714569091797, "rewards/margins": 0.19046784937381744, "rewards/rejected": -0.3951749801635742, "step": 6629 }, { "epoch": 18.151950718685832, "grad_norm": 4.277611255645752, "learning_rate": 9.178082191780821e-08, "log_odds_chosen": 3.2482388019561768, "log_odds_ratio": -0.22391226887702942, "logits/chosen": 1.3693163394927979, "logits/rejected": 1.4130783081054688, "logps/chosen": -1.7151869535446167, "logps/rejected": -4.780392646789551, "loss": 0.4986, "nll_loss": 0.4762267768383026, "rewards/accuracies": 1.0, "rewards/chosen": -0.1715187132358551, "rewards/margins": 0.3065205216407776, "rewards/rejected": -0.4780392348766327, "step": 6630 }, { "epoch": 18.154688569472963, "grad_norm": 4.001708507537842, "learning_rate": 9.164383561643835e-08, "log_odds_chosen": 2.2757725715637207, "log_odds_ratio": -0.2722524404525757, "logits/chosen": 0.9938722848892212, "logits/rejected": 0.9051498174667358, "logps/chosen": -1.6755859851837158, "logps/rejected": -3.6500704288482666, "loss": 0.6622, "nll_loss": 0.6349368095397949, "rewards/accuracies": 1.0, "rewards/chosen": -0.16755859553813934, "rewards/margins": 0.1974484920501709, "rewards/rejected": -0.36500710248947144, "step": 6631 }, { "epoch": 18.157426420260094, "grad_norm": 6.524533748626709, "learning_rate": 9.150684931506848e-08, "log_odds_chosen": 1.775574803352356, "log_odds_ratio": -0.9274653196334839, "logits/chosen": 1.0016049146652222, "logits/rejected": 0.9774714708328247, "logps/chosen": -2.419893741607666, "logps/rejected": -3.989262104034424, "loss": 0.6026, "nll_loss": 0.509882390499115, "rewards/accuracies": 0.75, "rewards/chosen": -0.2419893741607666, "rewards/margins": 0.1569368690252304, "rewards/rejected": -0.3989262282848358, "step": 6632 }, { "epoch": 18.16016427104723, "grad_norm": 3.980043411254883, "learning_rate": 9.136986301369864e-08, "log_odds_chosen": 3.5701229572296143, "log_odds_ratio": -0.17311696708202362, "logits/chosen": 1.044929027557373, "logits/rejected": 1.0447789430618286, "logps/chosen": -2.550534248352051, "logps/rejected": -5.987936973571777, "loss": 0.5917, "nll_loss": 0.5743973851203918, "rewards/accuracies": 1.0, "rewards/chosen": -0.25505343079566956, "rewards/margins": 0.3437403440475464, "rewards/rejected": -0.5987937450408936, "step": 6633 }, { "epoch": 18.16290212183436, "grad_norm": 3.5806665420532227, "learning_rate": 9.123287671232876e-08, "log_odds_chosen": 1.0799731016159058, "log_odds_ratio": -0.36899691820144653, "logits/chosen": 1.4494640827178955, "logits/rejected": 1.3565890789031982, "logps/chosen": -1.3785516023635864, "logps/rejected": -2.2518036365509033, "loss": 0.3888, "nll_loss": 0.3519481122493744, "rewards/accuracies": 0.875, "rewards/chosen": -0.1378551572561264, "rewards/margins": 0.08732521533966064, "rewards/rejected": -0.22518038749694824, "step": 6634 }, { "epoch": 18.16563997262149, "grad_norm": 6.246885299682617, "learning_rate": 9.10958904109589e-08, "log_odds_chosen": 1.8152529001235962, "log_odds_ratio": -0.1907723993062973, "logits/chosen": 1.2204169034957886, "logits/rejected": 1.113685965538025, "logps/chosen": -1.6890623569488525, "logps/rejected": -3.298971652984619, "loss": 0.5035, "nll_loss": 0.4844200611114502, "rewards/accuracies": 1.0, "rewards/chosen": -0.16890622675418854, "rewards/margins": 0.16099095344543457, "rewards/rejected": -0.3298971652984619, "step": 6635 }, { "epoch": 18.168377823408623, "grad_norm": 5.45095157623291, "learning_rate": 9.095890410958904e-08, "log_odds_chosen": 1.993837833404541, "log_odds_ratio": -0.4695494771003723, "logits/chosen": 0.8446676731109619, "logits/rejected": 0.7903630137443542, "logps/chosen": -1.5032798051834106, "logps/rejected": -3.1743221282958984, "loss": 0.6018, "nll_loss": 0.5548542737960815, "rewards/accuracies": 0.625, "rewards/chosen": -0.15032798051834106, "rewards/margins": 0.16710421442985535, "rewards/rejected": -0.3174322247505188, "step": 6636 }, { "epoch": 18.171115674195757, "grad_norm": 3.904284715652466, "learning_rate": 9.082191780821917e-08, "log_odds_chosen": 3.3863630294799805, "log_odds_ratio": -0.17600604891777039, "logits/chosen": 1.3555901050567627, "logits/rejected": 1.41116201877594, "logps/chosen": -2.0130581855773926, "logps/rejected": -5.200718879699707, "loss": 0.53, "nll_loss": 0.5124160051345825, "rewards/accuracies": 1.0, "rewards/chosen": -0.2013058364391327, "rewards/margins": 0.3187660574913025, "rewards/rejected": -0.5200718641281128, "step": 6637 }, { "epoch": 18.17385352498289, "grad_norm": 3.9549484252929688, "learning_rate": 9.06849315068493e-08, "log_odds_chosen": 3.7259316444396973, "log_odds_ratio": -0.1385532170534134, "logits/chosen": 0.6696140766143799, "logits/rejected": 0.6331390142440796, "logps/chosen": -1.6590080261230469, "logps/rejected": -5.146754264831543, "loss": 0.4798, "nll_loss": 0.4659130573272705, "rewards/accuracies": 1.0, "rewards/chosen": -0.1659008115530014, "rewards/margins": 0.34877461194992065, "rewards/rejected": -0.5146754384040833, "step": 6638 }, { "epoch": 18.17659137577002, "grad_norm": 3.982435703277588, "learning_rate": 9.054794520547946e-08, "log_odds_chosen": 2.451350450515747, "log_odds_ratio": -0.2254260927438736, "logits/chosen": 1.213310718536377, "logits/rejected": 1.1352124214172363, "logps/chosen": -1.8403964042663574, "logps/rejected": -4.095892906188965, "loss": 0.5396, "nll_loss": 0.5170654058456421, "rewards/accuracies": 1.0, "rewards/chosen": -0.1840396523475647, "rewards/margins": 0.22554968297481537, "rewards/rejected": -0.40958932042121887, "step": 6639 }, { "epoch": 18.17932922655715, "grad_norm": 3.6937036514282227, "learning_rate": 9.041095890410958e-08, "log_odds_chosen": 2.4835753440856934, "log_odds_ratio": -0.16719883680343628, "logits/chosen": 1.272480845451355, "logits/rejected": 1.2476518154144287, "logps/chosen": -1.647362470626831, "logps/rejected": -3.9317078590393066, "loss": 0.431, "nll_loss": 0.41423770785331726, "rewards/accuracies": 1.0, "rewards/chosen": -0.16473627090454102, "rewards/margins": 0.2284345030784607, "rewards/rejected": -0.3931707739830017, "step": 6640 }, { "epoch": 18.182067077344286, "grad_norm": 3.7914791107177734, "learning_rate": 9.027397260273972e-08, "log_odds_chosen": 2.3522982597351074, "log_odds_ratio": -0.13353776931762695, "logits/chosen": 0.9453077912330627, "logits/rejected": 0.9344748258590698, "logps/chosen": -2.0302107334136963, "logps/rejected": -4.2020263671875, "loss": 0.4375, "nll_loss": 0.42410650849342346, "rewards/accuracies": 1.0, "rewards/chosen": -0.2030210644006729, "rewards/margins": 0.21718157827854156, "rewards/rejected": -0.4202026426792145, "step": 6641 }, { "epoch": 18.184804928131417, "grad_norm": 3.701753854751587, "learning_rate": 9.013698630136986e-08, "log_odds_chosen": 2.2686166763305664, "log_odds_ratio": -0.18521448969841003, "logits/chosen": 1.1258409023284912, "logits/rejected": 1.0086089372634888, "logps/chosen": -1.2370104789733887, "logps/rejected": -3.2174625396728516, "loss": 0.4365, "nll_loss": 0.41793373227119446, "rewards/accuracies": 1.0, "rewards/chosen": -0.1237010583281517, "rewards/margins": 0.19804520905017853, "rewards/rejected": -0.321746289730072, "step": 6642 }, { "epoch": 18.187542778918548, "grad_norm": 6.505953788757324, "learning_rate": 9e-08, "log_odds_chosen": 0.7302302122116089, "log_odds_ratio": -0.4225139021873474, "logits/chosen": 1.3737472295761108, "logits/rejected": 1.3567627668380737, "logps/chosen": -1.763543725013733, "logps/rejected": -2.3589253425598145, "loss": 0.5546, "nll_loss": 0.5123069286346436, "rewards/accuracies": 0.875, "rewards/chosen": -0.17635439336299896, "rewards/margins": 0.05953817069530487, "rewards/rejected": -0.23589254915714264, "step": 6643 }, { "epoch": 18.190280629705683, "grad_norm": 3.405791997909546, "learning_rate": 8.986301369863012e-08, "log_odds_chosen": 3.888640880584717, "log_odds_ratio": -0.08045759052038193, "logits/chosen": 1.2828902006149292, "logits/rejected": 1.2580281496047974, "logps/chosen": -1.8048666715621948, "logps/rejected": -5.40537166595459, "loss": 0.5096, "nll_loss": 0.501507580280304, "rewards/accuracies": 1.0, "rewards/chosen": -0.18048666417598724, "rewards/margins": 0.3600505590438843, "rewards/rejected": -0.5405372381210327, "step": 6644 }, { "epoch": 18.193018480492814, "grad_norm": 7.459743976593018, "learning_rate": 8.972602739726028e-08, "log_odds_chosen": 2.6679885387420654, "log_odds_ratio": -0.5150603652000427, "logits/chosen": 1.0972944498062134, "logits/rejected": 1.0996171236038208, "logps/chosen": -2.6108622550964355, "logps/rejected": -5.082531452178955, "loss": 0.5648, "nll_loss": 0.5132935643196106, "rewards/accuracies": 0.75, "rewards/chosen": -0.26108622550964355, "rewards/margins": 0.24716690182685852, "rewards/rejected": -0.5082530975341797, "step": 6645 }, { "epoch": 18.195756331279945, "grad_norm": 4.990531921386719, "learning_rate": 8.958904109589042e-08, "log_odds_chosen": 2.884934902191162, "log_odds_ratio": -0.1962774246931076, "logits/chosen": 1.1275660991668701, "logits/rejected": 1.1003108024597168, "logps/chosen": -1.6474382877349854, "logps/rejected": -4.324141502380371, "loss": 0.5895, "nll_loss": 0.5698857307434082, "rewards/accuracies": 0.875, "rewards/chosen": -0.1647438406944275, "rewards/margins": 0.26767030358314514, "rewards/rejected": -0.43241414427757263, "step": 6646 }, { "epoch": 18.198494182067076, "grad_norm": 4.048061370849609, "learning_rate": 8.945205479452054e-08, "log_odds_chosen": 2.1585469245910645, "log_odds_ratio": -0.342111200094223, "logits/chosen": 0.9428882598876953, "logits/rejected": 0.9495907425880432, "logps/chosen": -1.7411372661590576, "logps/rejected": -3.753079414367676, "loss": 0.4916, "nll_loss": 0.4573787450790405, "rewards/accuracies": 0.875, "rewards/chosen": -0.17411373555660248, "rewards/margins": 0.20119421184062958, "rewards/rejected": -0.37530794739723206, "step": 6647 }, { "epoch": 18.20123203285421, "grad_norm": 4.752103328704834, "learning_rate": 8.931506849315068e-08, "log_odds_chosen": 1.8971136808395386, "log_odds_ratio": -0.24591180682182312, "logits/chosen": 1.0942870378494263, "logits/rejected": 1.1492979526519775, "logps/chosen": -2.6887378692626953, "logps/rejected": -4.487176895141602, "loss": 0.5834, "nll_loss": 0.5588395595550537, "rewards/accuracies": 0.875, "rewards/chosen": -0.26887378096580505, "rewards/margins": 0.179843932390213, "rewards/rejected": -0.44871771335601807, "step": 6648 }, { "epoch": 18.203969883641342, "grad_norm": 7.20947265625, "learning_rate": 8.917808219178082e-08, "log_odds_chosen": 5.733906269073486, "log_odds_ratio": -0.07073388993740082, "logits/chosen": 1.2418994903564453, "logits/rejected": 1.291709303855896, "logps/chosen": -2.4697604179382324, "logps/rejected": -8.077515602111816, "loss": 0.701, "nll_loss": 0.6939228773117065, "rewards/accuracies": 1.0, "rewards/chosen": -0.24697604775428772, "rewards/margins": 0.5607755184173584, "rewards/rejected": -0.8077515363693237, "step": 6649 }, { "epoch": 18.206707734428473, "grad_norm": 4.665218830108643, "learning_rate": 8.904109589041094e-08, "log_odds_chosen": 2.0881261825561523, "log_odds_ratio": -0.1484522521495819, "logits/chosen": 0.814888596534729, "logits/rejected": 0.7313711643218994, "logps/chosen": -1.8986334800720215, "logps/rejected": -3.7764079570770264, "loss": 0.5539, "nll_loss": 0.5390344858169556, "rewards/accuracies": 1.0, "rewards/chosen": -0.18986335396766663, "rewards/margins": 0.18777744472026825, "rewards/rejected": -0.37764081358909607, "step": 6650 }, { "epoch": 18.209445585215605, "grad_norm": 3.7154574394226074, "learning_rate": 8.89041095890411e-08, "log_odds_chosen": 3.1367735862731934, "log_odds_ratio": -0.2580571472644806, "logits/chosen": 0.9389634728431702, "logits/rejected": 0.9892017841339111, "logps/chosen": -2.0079164505004883, "logps/rejected": -4.985128879547119, "loss": 0.4724, "nll_loss": 0.44659295678138733, "rewards/accuracies": 1.0, "rewards/chosen": -0.20079165697097778, "rewards/margins": 0.297721266746521, "rewards/rejected": -0.4985129237174988, "step": 6651 }, { "epoch": 18.21218343600274, "grad_norm": 3.375619649887085, "learning_rate": 8.876712328767123e-08, "log_odds_chosen": 3.515636920928955, "log_odds_ratio": -0.10874618589878082, "logits/chosen": 1.1299574375152588, "logits/rejected": 1.1466120481491089, "logps/chosen": -1.9412221908569336, "logps/rejected": -5.128578186035156, "loss": 0.5193, "nll_loss": 0.5084595084190369, "rewards/accuracies": 1.0, "rewards/chosen": -0.19412222504615784, "rewards/margins": 0.3187355697154999, "rewards/rejected": -0.5128577947616577, "step": 6652 }, { "epoch": 18.21492128678987, "grad_norm": 3.8203694820404053, "learning_rate": 8.863013698630137e-08, "log_odds_chosen": 3.6578309535980225, "log_odds_ratio": -0.14229105412960052, "logits/chosen": 0.9618589282035828, "logits/rejected": 0.9389894604682922, "logps/chosen": -1.9823191165924072, "logps/rejected": -5.360470294952393, "loss": 0.5318, "nll_loss": 0.5175698399543762, "rewards/accuracies": 1.0, "rewards/chosen": -0.19823192059993744, "rewards/margins": 0.3378151059150696, "rewards/rejected": -0.5360470414161682, "step": 6653 }, { "epoch": 18.217659137577, "grad_norm": 4.086785316467285, "learning_rate": 8.84931506849315e-08, "log_odds_chosen": 3.004894971847534, "log_odds_ratio": -0.09363102912902832, "logits/chosen": 1.1782492399215698, "logits/rejected": 1.2091701030731201, "logps/chosen": -1.9813579320907593, "logps/rejected": -4.831601142883301, "loss": 0.6297, "nll_loss": 0.6202892065048218, "rewards/accuracies": 1.0, "rewards/chosen": -0.19813579320907593, "rewards/margins": 0.28502434492111206, "rewards/rejected": -0.4831601083278656, "step": 6654 }, { "epoch": 18.220396988364133, "grad_norm": 9.330672264099121, "learning_rate": 8.835616438356164e-08, "log_odds_chosen": 1.8061765432357788, "log_odds_ratio": -0.4579620361328125, "logits/chosen": 1.2292240858078003, "logits/rejected": 1.1846545934677124, "logps/chosen": -2.603895902633667, "logps/rejected": -4.279277801513672, "loss": 0.6965, "nll_loss": 0.6506681442260742, "rewards/accuracies": 0.75, "rewards/chosen": -0.26038962602615356, "rewards/margins": 0.16753822565078735, "rewards/rejected": -0.42792782187461853, "step": 6655 }, { "epoch": 18.223134839151268, "grad_norm": 9.86345386505127, "learning_rate": 8.821917808219178e-08, "log_odds_chosen": 2.5629518032073975, "log_odds_ratio": -0.7881850004196167, "logits/chosen": 0.9827011227607727, "logits/rejected": 0.9564436078071594, "logps/chosen": -2.3684725761413574, "logps/rejected": -4.7391676902771, "loss": 0.5974, "nll_loss": 0.5185937881469727, "rewards/accuracies": 0.75, "rewards/chosen": -0.23684726655483246, "rewards/margins": 0.2370694875717163, "rewards/rejected": -0.47391676902770996, "step": 6656 }, { "epoch": 18.2258726899384, "grad_norm": 6.6765828132629395, "learning_rate": 8.80821917808219e-08, "log_odds_chosen": 1.82901930809021, "log_odds_ratio": -0.48916274309158325, "logits/chosen": 1.228966236114502, "logits/rejected": 1.2600932121276855, "logps/chosen": -1.8984668254852295, "logps/rejected": -3.7029247283935547, "loss": 0.6344, "nll_loss": 0.5855193138122559, "rewards/accuracies": 0.75, "rewards/chosen": -0.1898466795682907, "rewards/margins": 0.18044579029083252, "rewards/rejected": -0.3702924847602844, "step": 6657 }, { "epoch": 18.22861054072553, "grad_norm": 3.2660281658172607, "learning_rate": 8.794520547945205e-08, "log_odds_chosen": 3.9540185928344727, "log_odds_ratio": -0.08538086712360382, "logits/chosen": 1.19252610206604, "logits/rejected": 1.2522320747375488, "logps/chosen": -1.8438124656677246, "logps/rejected": -5.606551170349121, "loss": 0.4831, "nll_loss": 0.4745901823043823, "rewards/accuracies": 1.0, "rewards/chosen": -0.18438124656677246, "rewards/margins": 0.37627387046813965, "rewards/rejected": -0.5606551170349121, "step": 6658 }, { "epoch": 18.23134839151266, "grad_norm": 5.143899440765381, "learning_rate": 8.78082191780822e-08, "log_odds_chosen": 1.7876754999160767, "log_odds_ratio": -0.18352362513542175, "logits/chosen": 1.1426420211791992, "logits/rejected": 1.120102047920227, "logps/chosen": -2.0281097888946533, "logps/rejected": -3.63301420211792, "loss": 0.528, "nll_loss": 0.5096238851547241, "rewards/accuracies": 1.0, "rewards/chosen": -0.20281098783016205, "rewards/margins": 0.16049045324325562, "rewards/rejected": -0.36330142617225647, "step": 6659 }, { "epoch": 18.234086242299796, "grad_norm": 3.7634451389312744, "learning_rate": 8.767123287671232e-08, "log_odds_chosen": 4.0908203125, "log_odds_ratio": -0.07859034091234207, "logits/chosen": 1.1963701248168945, "logits/rejected": 1.2174100875854492, "logps/chosen": -1.874737024307251, "logps/rejected": -5.748384475708008, "loss": 0.4915, "nll_loss": 0.48367002606391907, "rewards/accuracies": 1.0, "rewards/chosen": -0.18747368454933167, "rewards/margins": 0.3873647451400757, "rewards/rejected": -0.5748384594917297, "step": 6660 }, { "epoch": 18.236824093086927, "grad_norm": 3.642899513244629, "learning_rate": 8.753424657534246e-08, "log_odds_chosen": 4.671297073364258, "log_odds_ratio": -0.1367226541042328, "logits/chosen": 1.216750144958496, "logits/rejected": 1.2434653043746948, "logps/chosen": -2.5046226978302, "logps/rejected": -7.080835819244385, "loss": 0.7053, "nll_loss": 0.6916240453720093, "rewards/accuracies": 1.0, "rewards/chosen": -0.25046226382255554, "rewards/margins": 0.45762133598327637, "rewards/rejected": -0.7080835700035095, "step": 6661 }, { "epoch": 18.239561943874058, "grad_norm": 4.965995788574219, "learning_rate": 8.73972602739726e-08, "log_odds_chosen": 2.276928424835205, "log_odds_ratio": -0.355549156665802, "logits/chosen": 0.866877555847168, "logits/rejected": 0.8972045183181763, "logps/chosen": -1.7168759107589722, "logps/rejected": -3.6890406608581543, "loss": 0.6303, "nll_loss": 0.5947026014328003, "rewards/accuracies": 0.625, "rewards/chosen": -0.17168758809566498, "rewards/margins": 0.1972164660692215, "rewards/rejected": -0.3689040541648865, "step": 6662 }, { "epoch": 18.24229979466119, "grad_norm": 4.220863342285156, "learning_rate": 8.726027397260274e-08, "log_odds_chosen": 3.9955639839172363, "log_odds_ratio": -0.09477740526199341, "logits/chosen": 0.9345722198486328, "logits/rejected": 1.0681904554367065, "logps/chosen": -1.7826069593429565, "logps/rejected": -5.605752468109131, "loss": 0.8006, "nll_loss": 0.7911518812179565, "rewards/accuracies": 1.0, "rewards/chosen": -0.1782606989145279, "rewards/margins": 0.3823145031929016, "rewards/rejected": -0.5605751872062683, "step": 6663 }, { "epoch": 18.245037645448324, "grad_norm": 9.552872657775879, "learning_rate": 8.712328767123287e-08, "log_odds_chosen": 2.8089146614074707, "log_odds_ratio": -0.19333304464817047, "logits/chosen": 0.8662161231040955, "logits/rejected": 0.8656207919120789, "logps/chosen": -1.956823468208313, "logps/rejected": -4.59068489074707, "loss": 0.6609, "nll_loss": 0.6416140794754028, "rewards/accuracies": 1.0, "rewards/chosen": -0.1956823468208313, "rewards/margins": 0.26338618993759155, "rewards/rejected": -0.4590684771537781, "step": 6664 }, { "epoch": 18.247775496235455, "grad_norm": 3.2262158393859863, "learning_rate": 8.698630136986301e-08, "log_odds_chosen": 2.8757729530334473, "log_odds_ratio": -0.1284419149160385, "logits/chosen": 1.169006586074829, "logits/rejected": 1.1792861223220825, "logps/chosen": -1.6305451393127441, "logps/rejected": -4.293559551239014, "loss": 0.4528, "nll_loss": 0.439944863319397, "rewards/accuracies": 1.0, "rewards/chosen": -0.16305452585220337, "rewards/margins": 0.2663014531135559, "rewards/rejected": -0.4293559789657593, "step": 6665 }, { "epoch": 18.250513347022586, "grad_norm": 3.4714889526367188, "learning_rate": 8.684931506849315e-08, "log_odds_chosen": 2.440200090408325, "log_odds_ratio": -0.15320934355258942, "logits/chosen": 0.8763296604156494, "logits/rejected": 0.828921914100647, "logps/chosen": -1.8892894983291626, "logps/rejected": -4.174559593200684, "loss": 0.4683, "nll_loss": 0.4529435634613037, "rewards/accuracies": 1.0, "rewards/chosen": -0.18892896175384521, "rewards/margins": 0.2285270392894745, "rewards/rejected": -0.4174559712409973, "step": 6666 }, { "epoch": 18.253251197809718, "grad_norm": 3.6443417072296143, "learning_rate": 8.671232876712328e-08, "log_odds_chosen": 1.7753775119781494, "log_odds_ratio": -0.23399308323860168, "logits/chosen": 1.073183536529541, "logits/rejected": 1.034680962562561, "logps/chosen": -1.5295498371124268, "logps/rejected": -3.1077303886413574, "loss": 0.3994, "nll_loss": 0.37595561146736145, "rewards/accuracies": 1.0, "rewards/chosen": -0.15295499563217163, "rewards/margins": 0.15781807899475098, "rewards/rejected": -0.3107730746269226, "step": 6667 }, { "epoch": 18.255989048596852, "grad_norm": 4.266511917114258, "learning_rate": 8.657534246575342e-08, "log_odds_chosen": 4.28217077255249, "log_odds_ratio": -0.11185799539089203, "logits/chosen": 0.9603373408317566, "logits/rejected": 0.9337493181228638, "logps/chosen": -2.1767520904541016, "logps/rejected": -6.161718368530273, "loss": 0.6032, "nll_loss": 0.592004656791687, "rewards/accuracies": 1.0, "rewards/chosen": -0.21767520904541016, "rewards/margins": 0.3984966576099396, "rewards/rejected": -0.6161718368530273, "step": 6668 }, { "epoch": 18.258726899383984, "grad_norm": 7.6179351806640625, "learning_rate": 8.643835616438356e-08, "log_odds_chosen": 4.039348125457764, "log_odds_ratio": -0.3896128237247467, "logits/chosen": 1.0850313901901245, "logits/rejected": 1.1174207925796509, "logps/chosen": -2.0750112533569336, "logps/rejected": -5.912972450256348, "loss": 0.5274, "nll_loss": 0.48839667439460754, "rewards/accuracies": 0.875, "rewards/chosen": -0.2075011432170868, "rewards/margins": 0.3837961256504059, "rewards/rejected": -0.5912972688674927, "step": 6669 }, { "epoch": 18.261464750171115, "grad_norm": 3.5789694786071777, "learning_rate": 8.630136986301371e-08, "log_odds_chosen": 1.568458080291748, "log_odds_ratio": -0.22291134297847748, "logits/chosen": 1.1089893579483032, "logits/rejected": 0.9487820863723755, "logps/chosen": -1.5276150703430176, "logps/rejected": -2.8666610717773438, "loss": 0.4446, "nll_loss": 0.42235004901885986, "rewards/accuracies": 1.0, "rewards/chosen": -0.1527615189552307, "rewards/margins": 0.1339045912027359, "rewards/rejected": -0.2866661250591278, "step": 6670 }, { "epoch": 18.26420260095825, "grad_norm": 3.2926273345947266, "learning_rate": 8.616438356164383e-08, "log_odds_chosen": 1.755001187324524, "log_odds_ratio": -0.2845591902732849, "logits/chosen": 1.4640464782714844, "logits/rejected": 1.3890035152435303, "logps/chosen": -1.2226932048797607, "logps/rejected": -2.768134355545044, "loss": 0.4132, "nll_loss": 0.3847671151161194, "rewards/accuracies": 0.875, "rewards/chosen": -0.12226931005716324, "rewards/margins": 0.15454412996768951, "rewards/rejected": -0.27681344747543335, "step": 6671 }, { "epoch": 18.26694045174538, "grad_norm": 4.107486248016357, "learning_rate": 8.602739726027397e-08, "log_odds_chosen": 3.208768844604492, "log_odds_ratio": -0.17182986438274384, "logits/chosen": 0.9181480407714844, "logits/rejected": 0.879017174243927, "logps/chosen": -2.1342344284057617, "logps/rejected": -5.199291229248047, "loss": 0.6755, "nll_loss": 0.6582890152931213, "rewards/accuracies": 0.875, "rewards/chosen": -0.2134234458208084, "rewards/margins": 0.3065057396888733, "rewards/rejected": -0.5199291706085205, "step": 6672 }, { "epoch": 18.269678302532512, "grad_norm": 3.5577855110168457, "learning_rate": 8.589041095890411e-08, "log_odds_chosen": 2.6110057830810547, "log_odds_ratio": -0.13219115138053894, "logits/chosen": 1.3249355554580688, "logits/rejected": 1.3343889713287354, "logps/chosen": -1.724048137664795, "logps/rejected": -4.1695051193237305, "loss": 0.4728, "nll_loss": 0.45960426330566406, "rewards/accuracies": 1.0, "rewards/chosen": -0.17240481078624725, "rewards/margins": 0.24454572796821594, "rewards/rejected": -0.416950523853302, "step": 6673 }, { "epoch": 18.272416153319643, "grad_norm": 3.43696928024292, "learning_rate": 8.575342465753424e-08, "log_odds_chosen": 2.6842103004455566, "log_odds_ratio": -0.22098249197006226, "logits/chosen": 1.1990320682525635, "logits/rejected": 1.2218514680862427, "logps/chosen": -2.230020046234131, "logps/rejected": -4.738062381744385, "loss": 0.5396, "nll_loss": 0.5174608826637268, "rewards/accuracies": 1.0, "rewards/chosen": -0.22300201654434204, "rewards/margins": 0.25080418586730957, "rewards/rejected": -0.4738062620162964, "step": 6674 }, { "epoch": 18.275154004106778, "grad_norm": 4.207827091217041, "learning_rate": 8.561643835616438e-08, "log_odds_chosen": 2.632449150085449, "log_odds_ratio": -0.16030025482177734, "logits/chosen": 1.2000523805618286, "logits/rejected": 1.2520273923873901, "logps/chosen": -2.2321107387542725, "logps/rejected": -4.750351905822754, "loss": 0.597, "nll_loss": 0.5809573531150818, "rewards/accuracies": 1.0, "rewards/chosen": -0.22321107983589172, "rewards/margins": 0.25182414054870605, "rewards/rejected": -0.4750352203845978, "step": 6675 }, { "epoch": 18.27789185489391, "grad_norm": 4.876904487609863, "learning_rate": 8.547945205479453e-08, "log_odds_chosen": 3.677102565765381, "log_odds_ratio": -0.26382362842559814, "logits/chosen": 0.9224876165390015, "logits/rejected": 0.948662281036377, "logps/chosen": -2.0180983543395996, "logps/rejected": -5.487950801849365, "loss": 0.6133, "nll_loss": 0.5868685841560364, "rewards/accuracies": 0.875, "rewards/chosen": -0.2018098384141922, "rewards/margins": 0.34698522090911865, "rewards/rejected": -0.5487951040267944, "step": 6676 }, { "epoch": 18.28062970568104, "grad_norm": 3.7213146686553955, "learning_rate": 8.534246575342465e-08, "log_odds_chosen": 3.6938695907592773, "log_odds_ratio": -0.21146179735660553, "logits/chosen": 1.0962419509887695, "logits/rejected": 1.0445754528045654, "logps/chosen": -1.4107476472854614, "logps/rejected": -4.871351718902588, "loss": 0.5078, "nll_loss": 0.48665347695350647, "rewards/accuracies": 0.875, "rewards/chosen": -0.1410747766494751, "rewards/margins": 0.3460604250431061, "rewards/rejected": -0.48713523149490356, "step": 6677 }, { "epoch": 18.28336755646817, "grad_norm": 3.61653208732605, "learning_rate": 8.520547945205479e-08, "log_odds_chosen": 2.197624683380127, "log_odds_ratio": -0.2952112555503845, "logits/chosen": 1.1692932844161987, "logits/rejected": 1.114503264427185, "logps/chosen": -1.6246474981307983, "logps/rejected": -3.6257615089416504, "loss": 0.4659, "nll_loss": 0.436358779668808, "rewards/accuracies": 0.875, "rewards/chosen": -0.16246475279331207, "rewards/margins": 0.20011141896247864, "rewards/rejected": -0.3625761866569519, "step": 6678 }, { "epoch": 18.286105407255306, "grad_norm": 4.7210588455200195, "learning_rate": 8.506849315068493e-08, "log_odds_chosen": 1.726935625076294, "log_odds_ratio": -0.5261061787605286, "logits/chosen": 1.1099621057510376, "logits/rejected": 1.1106786727905273, "logps/chosen": -1.9166083335876465, "logps/rejected": -3.5184168815612793, "loss": 0.4982, "nll_loss": 0.4456283450126648, "rewards/accuracies": 0.75, "rewards/chosen": -0.1916608363389969, "rewards/margins": 0.16018083691596985, "rewards/rejected": -0.35184168815612793, "step": 6679 }, { "epoch": 18.288843258042437, "grad_norm": 7.655694007873535, "learning_rate": 8.493150684931506e-08, "log_odds_chosen": 1.8323858976364136, "log_odds_ratio": -0.1911255419254303, "logits/chosen": 1.2881293296813965, "logits/rejected": 1.289190411567688, "logps/chosen": -1.9958598613739014, "logps/rejected": -3.6572630405426025, "loss": 0.5473, "nll_loss": 0.5281552076339722, "rewards/accuracies": 1.0, "rewards/chosen": -0.19958598911762238, "rewards/margins": 0.1661403328180313, "rewards/rejected": -0.3657263219356537, "step": 6680 }, { "epoch": 18.29158110882957, "grad_norm": 3.9771759510040283, "learning_rate": 8.47945205479452e-08, "log_odds_chosen": 2.999154806137085, "log_odds_ratio": -0.1108412891626358, "logits/chosen": 1.1962891817092896, "logits/rejected": 1.1984368562698364, "logps/chosen": -1.775942087173462, "logps/rejected": -4.527791500091553, "loss": 0.476, "nll_loss": 0.464959979057312, "rewards/accuracies": 1.0, "rewards/chosen": -0.17759422957897186, "rewards/margins": 0.2751849293708801, "rewards/rejected": -0.4527791738510132, "step": 6681 }, { "epoch": 18.2943189596167, "grad_norm": 3.599586009979248, "learning_rate": 8.465753424657533e-08, "log_odds_chosen": 3.3803844451904297, "log_odds_ratio": -0.16870680451393127, "logits/chosen": 1.0590720176696777, "logits/rejected": 1.0027612447738647, "logps/chosen": -1.483726978302002, "logps/rejected": -4.634086608886719, "loss": 0.5476, "nll_loss": 0.530692994594574, "rewards/accuracies": 1.0, "rewards/chosen": -0.14837270975112915, "rewards/margins": 0.31503596901893616, "rewards/rejected": -0.4634086489677429, "step": 6682 }, { "epoch": 18.297056810403834, "grad_norm": 3.972949743270874, "learning_rate": 8.452054794520549e-08, "log_odds_chosen": 3.862271308898926, "log_odds_ratio": -0.2627445161342621, "logits/chosen": 0.9674448370933533, "logits/rejected": 0.9195493459701538, "logps/chosen": -1.5169885158538818, "logps/rejected": -5.149251461029053, "loss": 0.4737, "nll_loss": 0.4473917484283447, "rewards/accuracies": 0.75, "rewards/chosen": -0.15169885754585266, "rewards/margins": 0.3632262945175171, "rewards/rejected": -0.5149251222610474, "step": 6683 }, { "epoch": 18.299794661190965, "grad_norm": 3.313849925994873, "learning_rate": 8.438356164383561e-08, "log_odds_chosen": 2.6277499198913574, "log_odds_ratio": -0.19905780255794525, "logits/chosen": 1.1368013620376587, "logits/rejected": 1.1349244117736816, "logps/chosen": -1.5693085193634033, "logps/rejected": -3.846949338912964, "loss": 0.442, "nll_loss": 0.4221283793449402, "rewards/accuracies": 0.875, "rewards/chosen": -0.1569308638572693, "rewards/margins": 0.2277640998363495, "rewards/rejected": -0.3846949338912964, "step": 6684 }, { "epoch": 18.302532511978097, "grad_norm": 4.835533618927002, "learning_rate": 8.424657534246575e-08, "log_odds_chosen": 2.9527180194854736, "log_odds_ratio": -0.29597800970077515, "logits/chosen": 1.1127328872680664, "logits/rejected": 1.1118407249450684, "logps/chosen": -2.241309881210327, "logps/rejected": -5.070197105407715, "loss": 0.6959, "nll_loss": 0.6662986278533936, "rewards/accuracies": 0.875, "rewards/chosen": -0.2241310179233551, "rewards/margins": 0.2828887104988098, "rewards/rejected": -0.5070197582244873, "step": 6685 }, { "epoch": 18.305270362765228, "grad_norm": 3.7240734100341797, "learning_rate": 8.410958904109589e-08, "log_odds_chosen": 3.7724413871765137, "log_odds_ratio": -0.11578920483589172, "logits/chosen": 0.9636281728744507, "logits/rejected": 1.0087788105010986, "logps/chosen": -1.6541976928710938, "logps/rejected": -5.18723201751709, "loss": 0.5428, "nll_loss": 0.5311898589134216, "rewards/accuracies": 1.0, "rewards/chosen": -0.1654197722673416, "rewards/margins": 0.353303462266922, "rewards/rejected": -0.5187232494354248, "step": 6686 }, { "epoch": 18.308008213552363, "grad_norm": 3.511213779449463, "learning_rate": 8.397260273972601e-08, "log_odds_chosen": 3.317601203918457, "log_odds_ratio": -0.1309756189584732, "logits/chosen": 0.9450370669364929, "logits/rejected": 0.951729416847229, "logps/chosen": -2.053924560546875, "logps/rejected": -5.207420349121094, "loss": 0.523, "nll_loss": 0.5099127292633057, "rewards/accuracies": 1.0, "rewards/chosen": -0.20539246499538422, "rewards/margins": 0.31534960865974426, "rewards/rejected": -0.5207421183586121, "step": 6687 }, { "epoch": 18.310746064339494, "grad_norm": 5.475627422332764, "learning_rate": 8.383561643835615e-08, "log_odds_chosen": 1.938194990158081, "log_odds_ratio": -0.2743833065032959, "logits/chosen": 1.0569077730178833, "logits/rejected": 0.9500325918197632, "logps/chosen": -2.0505995750427246, "logps/rejected": -3.8749847412109375, "loss": 0.6136, "nll_loss": 0.5861334204673767, "rewards/accuracies": 0.875, "rewards/chosen": -0.2050599455833435, "rewards/margins": 0.18243852257728577, "rewards/rejected": -0.3874984681606293, "step": 6688 }, { "epoch": 18.313483915126625, "grad_norm": 3.463744878768921, "learning_rate": 8.36986301369863e-08, "log_odds_chosen": 5.661929607391357, "log_odds_ratio": -0.0780378058552742, "logits/chosen": 1.1359397172927856, "logits/rejected": 1.0975661277770996, "logps/chosen": -1.643965244293213, "logps/rejected": -7.040990352630615, "loss": 0.6044, "nll_loss": 0.5966244339942932, "rewards/accuracies": 1.0, "rewards/chosen": -0.16439653933048248, "rewards/margins": 0.5397025346755981, "rewards/rejected": -0.7040990591049194, "step": 6689 }, { "epoch": 18.316221765913756, "grad_norm": 6.633830547332764, "learning_rate": 8.356164383561644e-08, "log_odds_chosen": 1.4130924940109253, "log_odds_ratio": -0.3445202708244324, "logits/chosen": 1.1773314476013184, "logits/rejected": 1.113586664199829, "logps/chosen": -2.032762050628662, "logps/rejected": -3.252293825149536, "loss": 0.5253, "nll_loss": 0.4908785820007324, "rewards/accuracies": 0.75, "rewards/chosen": -0.20327618718147278, "rewards/margins": 0.12195321917533875, "rewards/rejected": -0.3252294063568115, "step": 6690 }, { "epoch": 18.31895961670089, "grad_norm": 7.641125202178955, "learning_rate": 8.342465753424657e-08, "log_odds_chosen": 0.745703935623169, "log_odds_ratio": -0.5439486503601074, "logits/chosen": 0.9758836030960083, "logits/rejected": 0.9848166704177856, "logps/chosen": -2.636375665664673, "logps/rejected": -3.323298692703247, "loss": 0.7047, "nll_loss": 0.6503114104270935, "rewards/accuracies": 0.625, "rewards/chosen": -0.26363757252693176, "rewards/margins": 0.06869229674339294, "rewards/rejected": -0.3323298692703247, "step": 6691 }, { "epoch": 18.321697467488022, "grad_norm": 3.7138242721557617, "learning_rate": 8.328767123287671e-08, "log_odds_chosen": 3.392474412918091, "log_odds_ratio": -0.08530393242835999, "logits/chosen": 1.2799437046051025, "logits/rejected": 1.3457832336425781, "logps/chosen": -2.352637767791748, "logps/rejected": -5.554548740386963, "loss": 0.5383, "nll_loss": 0.5297956466674805, "rewards/accuracies": 1.0, "rewards/chosen": -0.23526379466056824, "rewards/margins": 0.3201911151409149, "rewards/rejected": -0.5554549098014832, "step": 6692 }, { "epoch": 18.324435318275153, "grad_norm": 3.6875526905059814, "learning_rate": 8.315068493150685e-08, "log_odds_chosen": 3.7315151691436768, "log_odds_ratio": -0.11907804012298584, "logits/chosen": 1.0539277791976929, "logits/rejected": 1.058823585510254, "logps/chosen": -1.8915774822235107, "logps/rejected": -5.455721855163574, "loss": 0.4798, "nll_loss": 0.46790415048599243, "rewards/accuracies": 1.0, "rewards/chosen": -0.18915775418281555, "rewards/margins": 0.35641443729400635, "rewards/rejected": -0.5455722212791443, "step": 6693 }, { "epoch": 18.327173169062284, "grad_norm": 3.5968494415283203, "learning_rate": 8.301369863013697e-08, "log_odds_chosen": 3.5800693035125732, "log_odds_ratio": -0.17729313671588898, "logits/chosen": 1.280786156654358, "logits/rejected": 1.3264696598052979, "logps/chosen": -1.6600379943847656, "logps/rejected": -5.015140533447266, "loss": 0.5087, "nll_loss": 0.4909832179546356, "rewards/accuracies": 1.0, "rewards/chosen": -0.16600379347801208, "rewards/margins": 0.33551025390625, "rewards/rejected": -0.5015140175819397, "step": 6694 }, { "epoch": 18.32991101984942, "grad_norm": 3.4963676929473877, "learning_rate": 8.287671232876713e-08, "log_odds_chosen": 2.159137487411499, "log_odds_ratio": -0.2767191231250763, "logits/chosen": 1.0205963850021362, "logits/rejected": 0.9851412773132324, "logps/chosen": -2.129288673400879, "logps/rejected": -4.1882805824279785, "loss": 0.5864, "nll_loss": 0.5586835741996765, "rewards/accuracies": 0.75, "rewards/chosen": -0.2129288613796234, "rewards/margins": 0.2058991938829422, "rewards/rejected": -0.4188280701637268, "step": 6695 }, { "epoch": 18.33264887063655, "grad_norm": 10.136518478393555, "learning_rate": 8.273972602739726e-08, "log_odds_chosen": 2.5583348274230957, "log_odds_ratio": -0.5346494317054749, "logits/chosen": 1.2441076040267944, "logits/rejected": 1.2004663944244385, "logps/chosen": -2.347076892852783, "logps/rejected": -4.711967945098877, "loss": 0.5787, "nll_loss": 0.5252607464790344, "rewards/accuracies": 0.75, "rewards/chosen": -0.23470768332481384, "rewards/margins": 0.23648910224437714, "rewards/rejected": -0.47119683027267456, "step": 6696 }, { "epoch": 18.33538672142368, "grad_norm": 3.3751449584960938, "learning_rate": 8.260273972602739e-08, "log_odds_chosen": 2.5553700923919678, "log_odds_ratio": -0.13154767453670502, "logits/chosen": 1.2196028232574463, "logits/rejected": 1.1895136833190918, "logps/chosen": -1.5684553384780884, "logps/rejected": -3.8907175064086914, "loss": 0.4192, "nll_loss": 0.4060288965702057, "rewards/accuracies": 1.0, "rewards/chosen": -0.15684553980827332, "rewards/margins": 0.23222622275352478, "rewards/rejected": -0.3890717625617981, "step": 6697 }, { "epoch": 18.338124572210816, "grad_norm": 4.14400577545166, "learning_rate": 8.246575342465753e-08, "log_odds_chosen": 1.2629268169403076, "log_odds_ratio": -0.2999795377254486, "logits/chosen": 1.4955661296844482, "logits/rejected": 1.4445685148239136, "logps/chosen": -1.4968924522399902, "logps/rejected": -2.5770061016082764, "loss": 0.4505, "nll_loss": 0.42053520679473877, "rewards/accuracies": 0.875, "rewards/chosen": -0.14968925714492798, "rewards/margins": 0.10801136493682861, "rewards/rejected": -0.2577006220817566, "step": 6698 }, { "epoch": 18.340862422997947, "grad_norm": 4.339419841766357, "learning_rate": 8.232876712328767e-08, "log_odds_chosen": 1.9035836458206177, "log_odds_ratio": -0.19992685317993164, "logits/chosen": 1.3983045816421509, "logits/rejected": 1.4145467281341553, "logps/chosen": -2.125521421432495, "logps/rejected": -3.8965892791748047, "loss": 0.5298, "nll_loss": 0.5098109245300293, "rewards/accuracies": 1.0, "rewards/chosen": -0.21255216002464294, "rewards/margins": 0.1771067976951599, "rewards/rejected": -0.38965892791748047, "step": 6699 }, { "epoch": 18.34360027378508, "grad_norm": 4.022587776184082, "learning_rate": 8.21917808219178e-08, "log_odds_chosen": 2.74609112739563, "log_odds_ratio": -0.22212837636470795, "logits/chosen": 1.0336334705352783, "logits/rejected": 0.9880164861679077, "logps/chosen": -1.4159399271011353, "logps/rejected": -3.9474689960479736, "loss": 0.4438, "nll_loss": 0.4215702712535858, "rewards/accuracies": 0.875, "rewards/chosen": -0.14159399271011353, "rewards/margins": 0.25315290689468384, "rewards/rejected": -0.39474689960479736, "step": 6700 }, { "epoch": 18.34633812457221, "grad_norm": 3.5998382568359375, "learning_rate": 8.205479452054793e-08, "log_odds_chosen": 2.444119453430176, "log_odds_ratio": -0.14645543694496155, "logits/chosen": 1.3490389585494995, "logits/rejected": 1.3292979001998901, "logps/chosen": -1.5154213905334473, "logps/rejected": -3.707301378250122, "loss": 0.3961, "nll_loss": 0.3814432621002197, "rewards/accuracies": 1.0, "rewards/chosen": -0.15154214203357697, "rewards/margins": 0.21918798983097076, "rewards/rejected": -0.37073010206222534, "step": 6701 }, { "epoch": 18.349075975359344, "grad_norm": 4.054459571838379, "learning_rate": 8.191780821917808e-08, "log_odds_chosen": 3.318241596221924, "log_odds_ratio": -0.22837091982364655, "logits/chosen": 1.0106048583984375, "logits/rejected": 1.0098603963851929, "logps/chosen": -1.8253480195999146, "logps/rejected": -5.018198013305664, "loss": 0.5961, "nll_loss": 0.5732510089874268, "rewards/accuracies": 0.875, "rewards/chosen": -0.18253479897975922, "rewards/margins": 0.3192850351333618, "rewards/rejected": -0.5018198490142822, "step": 6702 }, { "epoch": 18.351813826146476, "grad_norm": 3.6499619483947754, "learning_rate": 8.178082191780822e-08, "log_odds_chosen": 4.0603156089782715, "log_odds_ratio": -0.14720502495765686, "logits/chosen": 1.1665821075439453, "logits/rejected": 1.220931887626648, "logps/chosen": -1.8322944641113281, "logps/rejected": -5.729433059692383, "loss": 0.6026, "nll_loss": 0.5879190564155579, "rewards/accuracies": 1.0, "rewards/chosen": -0.1832294464111328, "rewards/margins": 0.3897138237953186, "rewards/rejected": -0.5729432702064514, "step": 6703 }, { "epoch": 18.354551676933607, "grad_norm": 3.599649667739868, "learning_rate": 8.164383561643835e-08, "log_odds_chosen": 3.4945051670074463, "log_odds_ratio": -0.15852048993110657, "logits/chosen": 1.1197752952575684, "logits/rejected": 1.0892581939697266, "logps/chosen": -1.9439234733581543, "logps/rejected": -5.239331245422363, "loss": 0.4952, "nll_loss": 0.4793848693370819, "rewards/accuracies": 1.0, "rewards/chosen": -0.1943923532962799, "rewards/margins": 0.32954081892967224, "rewards/rejected": -0.5239331722259521, "step": 6704 }, { "epoch": 18.357289527720738, "grad_norm": 8.279677391052246, "learning_rate": 8.150684931506849e-08, "log_odds_chosen": 4.486970901489258, "log_odds_ratio": -0.3170095682144165, "logits/chosen": 0.9409933090209961, "logits/rejected": 0.9373185634613037, "logps/chosen": -2.667492389678955, "logps/rejected": -6.996136665344238, "loss": 0.6636, "nll_loss": 0.6319352984428406, "rewards/accuracies": 0.875, "rewards/chosen": -0.26674923300743103, "rewards/margins": 0.4328644275665283, "rewards/rejected": -0.699613630771637, "step": 6705 }, { "epoch": 18.360027378507873, "grad_norm": 4.102666854858398, "learning_rate": 8.136986301369863e-08, "log_odds_chosen": 1.4364317655563354, "log_odds_ratio": -0.26410308480262756, "logits/chosen": 1.127751350402832, "logits/rejected": 1.0479540824890137, "logps/chosen": -1.8744244575500488, "logps/rejected": -3.160036563873291, "loss": 0.4374, "nll_loss": 0.4110071063041687, "rewards/accuracies": 1.0, "rewards/chosen": -0.18744245171546936, "rewards/margins": 0.12856119871139526, "rewards/rejected": -0.3160036504268646, "step": 6706 }, { "epoch": 18.362765229295004, "grad_norm": 4.658812522888184, "learning_rate": 8.123287671232875e-08, "log_odds_chosen": 4.455776214599609, "log_odds_ratio": -0.13634422421455383, "logits/chosen": 1.312205195426941, "logits/rejected": 1.4012378454208374, "logps/chosen": -2.287095546722412, "logps/rejected": -6.535240650177002, "loss": 0.5909, "nll_loss": 0.5772352814674377, "rewards/accuracies": 0.875, "rewards/chosen": -0.22870954871177673, "rewards/margins": 0.4248145520687103, "rewards/rejected": -0.6535241603851318, "step": 6707 }, { "epoch": 18.365503080082135, "grad_norm": 4.380196571350098, "learning_rate": 8.10958904109589e-08, "log_odds_chosen": 1.5514602661132812, "log_odds_ratio": -0.3072621822357178, "logits/chosen": 1.2453664541244507, "logits/rejected": 1.203482985496521, "logps/chosen": -1.8212239742279053, "logps/rejected": -3.135228395462036, "loss": 0.4526, "nll_loss": 0.4219123125076294, "rewards/accuracies": 0.875, "rewards/chosen": -0.1821223944425583, "rewards/margins": 0.1314004361629486, "rewards/rejected": -0.3135228455066681, "step": 6708 }, { "epoch": 18.368240930869266, "grad_norm": 5.098725318908691, "learning_rate": 8.095890410958904e-08, "log_odds_chosen": 2.1273398399353027, "log_odds_ratio": -0.32379186153411865, "logits/chosen": 1.1688002347946167, "logits/rejected": 1.1728956699371338, "logps/chosen": -1.8461129665374756, "logps/rejected": -3.8066463470458984, "loss": 0.5793, "nll_loss": 0.5469433069229126, "rewards/accuracies": 0.875, "rewards/chosen": -0.18461130559444427, "rewards/margins": 0.19605335593223572, "rewards/rejected": -0.3806646764278412, "step": 6709 }, { "epoch": 18.3709787816564, "grad_norm": 4.712372303009033, "learning_rate": 8.082191780821918e-08, "log_odds_chosen": 2.364732265472412, "log_odds_ratio": -0.2537664473056793, "logits/chosen": 1.3356410264968872, "logits/rejected": 1.3889456987380981, "logps/chosen": -1.4402246475219727, "logps/rejected": -3.4712092876434326, "loss": 0.4393, "nll_loss": 0.4139515161514282, "rewards/accuracies": 0.875, "rewards/chosen": -0.14402247965335846, "rewards/margins": 0.20309844613075256, "rewards/rejected": -0.34712091088294983, "step": 6710 }, { "epoch": 18.373716632443532, "grad_norm": 3.5159826278686523, "learning_rate": 8.068493150684931e-08, "log_odds_chosen": 2.8084161281585693, "log_odds_ratio": -0.28667178750038147, "logits/chosen": 1.0480940341949463, "logits/rejected": 0.970841109752655, "logps/chosen": -1.4482958316802979, "logps/rejected": -4.088183403015137, "loss": 0.4691, "nll_loss": 0.4404492974281311, "rewards/accuracies": 1.0, "rewards/chosen": -0.14482957124710083, "rewards/margins": 0.26398879289627075, "rewards/rejected": -0.4088183641433716, "step": 6711 }, { "epoch": 18.376454483230663, "grad_norm": 3.594045639038086, "learning_rate": 8.054794520547945e-08, "log_odds_chosen": 3.7693870067596436, "log_odds_ratio": -0.1590816229581833, "logits/chosen": 1.3392362594604492, "logits/rejected": 1.3279943466186523, "logps/chosen": -1.7317026853561401, "logps/rejected": -5.241769790649414, "loss": 0.4482, "nll_loss": 0.43224456906318665, "rewards/accuracies": 1.0, "rewards/chosen": -0.173170268535614, "rewards/margins": 0.35100671648979187, "rewards/rejected": -0.5241770148277283, "step": 6712 }, { "epoch": 18.379192334017795, "grad_norm": 3.1998205184936523, "learning_rate": 8.041095890410959e-08, "log_odds_chosen": 4.357628345489502, "log_odds_ratio": -0.1308901011943817, "logits/chosen": 0.8672255277633667, "logits/rejected": 0.875258207321167, "logps/chosen": -2.0632810592651367, "logps/rejected": -6.278545379638672, "loss": 0.6238, "nll_loss": 0.6106815338134766, "rewards/accuracies": 1.0, "rewards/chosen": -0.2063281387090683, "rewards/margins": 0.4215264916419983, "rewards/rejected": -0.627854585647583, "step": 6713 }, { "epoch": 18.38193018480493, "grad_norm": 3.285274028778076, "learning_rate": 8.027397260273972e-08, "log_odds_chosen": 3.1729114055633545, "log_odds_ratio": -0.214167982339859, "logits/chosen": 1.131147027015686, "logits/rejected": 1.0993425846099854, "logps/chosen": -1.6692174673080444, "logps/rejected": -4.664012908935547, "loss": 0.4201, "nll_loss": 0.39868128299713135, "rewards/accuracies": 1.0, "rewards/chosen": -0.16692174971103668, "rewards/margins": 0.29947948455810547, "rewards/rejected": -0.46640124917030334, "step": 6714 }, { "epoch": 18.38466803559206, "grad_norm": 3.966848611831665, "learning_rate": 8.013698630136986e-08, "log_odds_chosen": 1.5428639650344849, "log_odds_ratio": -0.2793324887752533, "logits/chosen": 1.2012900114059448, "logits/rejected": 1.2073791027069092, "logps/chosen": -1.8695526123046875, "logps/rejected": -3.2301645278930664, "loss": 0.4561, "nll_loss": 0.4282114505767822, "rewards/accuracies": 0.875, "rewards/chosen": -0.1869552731513977, "rewards/margins": 0.13606120645999908, "rewards/rejected": -0.3230164647102356, "step": 6715 }, { "epoch": 18.38740588637919, "grad_norm": 3.545823574066162, "learning_rate": 8e-08, "log_odds_chosen": 5.135404586791992, "log_odds_ratio": -0.1364918053150177, "logits/chosen": 0.9959203004837036, "logits/rejected": 1.0020580291748047, "logps/chosen": -2.139483690261841, "logps/rejected": -7.1346049308776855, "loss": 0.5748, "nll_loss": 0.5611777305603027, "rewards/accuracies": 1.0, "rewards/chosen": -0.21394838392734528, "rewards/margins": 0.4995121359825134, "rewards/rejected": -0.7134605050086975, "step": 6716 }, { "epoch": 18.390143737166323, "grad_norm": 4.350179195404053, "learning_rate": 7.986301369863013e-08, "log_odds_chosen": 1.768106460571289, "log_odds_ratio": -0.42615821957588196, "logits/chosen": 0.9167879223823547, "logits/rejected": 0.9942624568939209, "logps/chosen": -1.5177615880966187, "logps/rejected": -3.026123523712158, "loss": 0.5088, "nll_loss": 0.4661678671836853, "rewards/accuracies": 0.875, "rewards/chosen": -0.15177616477012634, "rewards/margins": 0.15083619952201843, "rewards/rejected": -0.3026123642921448, "step": 6717 }, { "epoch": 18.392881587953458, "grad_norm": 9.097746849060059, "learning_rate": 7.972602739726027e-08, "log_odds_chosen": 2.3924245834350586, "log_odds_ratio": -0.41005003452301025, "logits/chosen": 1.2303121089935303, "logits/rejected": 1.2322750091552734, "logps/chosen": -2.436946392059326, "logps/rejected": -4.780077934265137, "loss": 0.6951, "nll_loss": 0.6541106104850769, "rewards/accuracies": 0.75, "rewards/chosen": -0.24369463324546814, "rewards/margins": 0.23431316018104553, "rewards/rejected": -0.47800779342651367, "step": 6718 }, { "epoch": 18.39561943874059, "grad_norm": 3.3048782348632812, "learning_rate": 7.95890410958904e-08, "log_odds_chosen": 2.58905029296875, "log_odds_ratio": -0.18905918300151825, "logits/chosen": 1.1320512294769287, "logits/rejected": 1.1374229192733765, "logps/chosen": -1.7365214824676514, "logps/rejected": -4.092135906219482, "loss": 0.5264, "nll_loss": 0.5074710845947266, "rewards/accuracies": 1.0, "rewards/chosen": -0.17365214228630066, "rewards/margins": 0.23556144535541534, "rewards/rejected": -0.4092136025428772, "step": 6719 }, { "epoch": 18.39835728952772, "grad_norm": 4.109984874725342, "learning_rate": 7.945205479452056e-08, "log_odds_chosen": 2.888296604156494, "log_odds_ratio": -0.14841662347316742, "logits/chosen": 0.9515587091445923, "logits/rejected": 0.9550536870956421, "logps/chosen": -1.872504711151123, "logps/rejected": -4.603331565856934, "loss": 0.6973, "nll_loss": 0.6825079917907715, "rewards/accuracies": 1.0, "rewards/chosen": -0.18725046515464783, "rewards/margins": 0.2730826735496521, "rewards/rejected": -0.4603331685066223, "step": 6720 }, { "epoch": 18.40109514031485, "grad_norm": 3.8359365463256836, "learning_rate": 7.931506849315068e-08, "log_odds_chosen": 1.8452180624008179, "log_odds_ratio": -0.2933691442012787, "logits/chosen": 1.1191176176071167, "logits/rejected": 1.0988640785217285, "logps/chosen": -1.8897629976272583, "logps/rejected": -3.626047134399414, "loss": 0.5664, "nll_loss": 0.5370503067970276, "rewards/accuracies": 0.875, "rewards/chosen": -0.18897631764411926, "rewards/margins": 0.17362841963768005, "rewards/rejected": -0.3626047372817993, "step": 6721 }, { "epoch": 18.403832991101986, "grad_norm": 3.9312398433685303, "learning_rate": 7.917808219178082e-08, "log_odds_chosen": 1.0871938467025757, "log_odds_ratio": -0.363799512386322, "logits/chosen": 1.224064588546753, "logits/rejected": 1.1301145553588867, "logps/chosen": -2.044196128845215, "logps/rejected": -3.014042854309082, "loss": 0.5117, "nll_loss": 0.4752770662307739, "rewards/accuracies": 1.0, "rewards/chosen": -0.20441961288452148, "rewards/margins": 0.09698465466499329, "rewards/rejected": -0.30140426754951477, "step": 6722 }, { "epoch": 18.406570841889117, "grad_norm": 4.409764289855957, "learning_rate": 7.904109589041096e-08, "log_odds_chosen": 1.8196927309036255, "log_odds_ratio": -0.20067724585533142, "logits/chosen": 0.9298926591873169, "logits/rejected": 0.9584622979164124, "logps/chosen": -1.9425199031829834, "logps/rejected": -3.5503947734832764, "loss": 0.5206, "nll_loss": 0.5005282163619995, "rewards/accuracies": 1.0, "rewards/chosen": -0.19425198435783386, "rewards/margins": 0.16078749299049377, "rewards/rejected": -0.35503944754600525, "step": 6723 }, { "epoch": 18.409308692676248, "grad_norm": 4.022587299346924, "learning_rate": 7.890410958904109e-08, "log_odds_chosen": 2.3216848373413086, "log_odds_ratio": -0.15724407136440277, "logits/chosen": 1.0094258785247803, "logits/rejected": 1.0993645191192627, "logps/chosen": -2.586780071258545, "logps/rejected": -4.8038177490234375, "loss": 0.6473, "nll_loss": 0.6316229104995728, "rewards/accuracies": 1.0, "rewards/chosen": -0.25867801904678345, "rewards/margins": 0.22170376777648926, "rewards/rejected": -0.4803817868232727, "step": 6724 }, { "epoch": 18.412046543463383, "grad_norm": 3.8510830402374268, "learning_rate": 7.876712328767122e-08, "log_odds_chosen": 2.160074472427368, "log_odds_ratio": -0.21979019045829773, "logits/chosen": 1.013904333114624, "logits/rejected": 0.9619355201721191, "logps/chosen": -1.3258492946624756, "logps/rejected": -3.205364227294922, "loss": 0.4814, "nll_loss": 0.459439218044281, "rewards/accuracies": 1.0, "rewards/chosen": -0.13258492946624756, "rewards/margins": 0.1879514902830124, "rewards/rejected": -0.32053643465042114, "step": 6725 }, { "epoch": 18.414784394250514, "grad_norm": 5.43516206741333, "learning_rate": 7.863013698630136e-08, "log_odds_chosen": 2.4409451484680176, "log_odds_ratio": -0.21924127638339996, "logits/chosen": 1.1988564729690552, "logits/rejected": 1.144782543182373, "logps/chosen": -1.910760521888733, "logps/rejected": -4.147648811340332, "loss": 0.5454, "nll_loss": 0.5234537124633789, "rewards/accuracies": 0.875, "rewards/chosen": -0.19107605516910553, "rewards/margins": 0.22368884086608887, "rewards/rejected": -0.4147648811340332, "step": 6726 }, { "epoch": 18.417522245037645, "grad_norm": 3.9227654933929443, "learning_rate": 7.84931506849315e-08, "log_odds_chosen": 3.248853921890259, "log_odds_ratio": -0.28022652864456177, "logits/chosen": 1.1917027235031128, "logits/rejected": 1.1850923299789429, "logps/chosen": -2.2710301876068115, "logps/rejected": -5.377742767333984, "loss": 0.5525, "nll_loss": 0.5244888067245483, "rewards/accuracies": 0.875, "rewards/chosen": -0.22710302472114563, "rewards/margins": 0.31067126989364624, "rewards/rejected": -0.5377743244171143, "step": 6727 }, { "epoch": 18.420260095824776, "grad_norm": 4.672009468078613, "learning_rate": 7.835616438356164e-08, "log_odds_chosen": 5.106588840484619, "log_odds_ratio": -0.24026384949684143, "logits/chosen": 1.172827959060669, "logits/rejected": 1.1944655179977417, "logps/chosen": -1.815507411956787, "logps/rejected": -6.731762886047363, "loss": 0.5444, "nll_loss": 0.5204082727432251, "rewards/accuracies": 0.875, "rewards/chosen": -0.1815507560968399, "rewards/margins": 0.4916255474090576, "rewards/rejected": -0.6731762886047363, "step": 6728 }, { "epoch": 18.42299794661191, "grad_norm": 4.057374000549316, "learning_rate": 7.821917808219178e-08, "log_odds_chosen": 2.3448727130889893, "log_odds_ratio": -0.24591633677482605, "logits/chosen": 1.1549078226089478, "logits/rejected": 1.1803306341171265, "logps/chosen": -2.210824966430664, "logps/rejected": -4.476085662841797, "loss": 0.5097, "nll_loss": 0.48510217666625977, "rewards/accuracies": 0.875, "rewards/chosen": -0.22108250856399536, "rewards/margins": 0.22652606666088104, "rewards/rejected": -0.4476085901260376, "step": 6729 }, { "epoch": 18.425735797399042, "grad_norm": 3.7231388092041016, "learning_rate": 7.808219178082192e-08, "log_odds_chosen": 3.8084378242492676, "log_odds_ratio": -0.23536261916160583, "logits/chosen": 1.023597002029419, "logits/rejected": 1.0361510515213013, "logps/chosen": -1.6201956272125244, "logps/rejected": -5.2125701904296875, "loss": 0.4998, "nll_loss": 0.47623270750045776, "rewards/accuracies": 1.0, "rewards/chosen": -0.16201958060264587, "rewards/margins": 0.3592374622821808, "rewards/rejected": -0.5212570428848267, "step": 6730 }, { "epoch": 18.428473648186174, "grad_norm": 3.5712997913360596, "learning_rate": 7.794520547945204e-08, "log_odds_chosen": 4.072152137756348, "log_odds_ratio": -0.14964210987091064, "logits/chosen": 0.9823923110961914, "logits/rejected": 0.9780721068382263, "logps/chosen": -1.7090508937835693, "logps/rejected": -5.604147911071777, "loss": 0.5092, "nll_loss": 0.49425995349884033, "rewards/accuracies": 1.0, "rewards/chosen": -0.17090511322021484, "rewards/margins": 0.3895096778869629, "rewards/rejected": -0.5604147911071777, "step": 6731 }, { "epoch": 18.431211498973305, "grad_norm": 3.4719927310943604, "learning_rate": 7.780821917808218e-08, "log_odds_chosen": 3.0930070877075195, "log_odds_ratio": -0.14671510457992554, "logits/chosen": 1.1636861562728882, "logits/rejected": 1.22654128074646, "logps/chosen": -2.2836434841156006, "logps/rejected": -5.214974403381348, "loss": 0.5408, "nll_loss": 0.5261239409446716, "rewards/accuracies": 1.0, "rewards/chosen": -0.22836434841156006, "rewards/margins": 0.29313310980796814, "rewards/rejected": -0.5214974880218506, "step": 6732 }, { "epoch": 18.43394934976044, "grad_norm": 3.7645413875579834, "learning_rate": 7.767123287671234e-08, "log_odds_chosen": 2.4615299701690674, "log_odds_ratio": -0.2864741086959839, "logits/chosen": 1.1130033731460571, "logits/rejected": 1.1007449626922607, "logps/chosen": -1.93595290184021, "logps/rejected": -4.274165153503418, "loss": 0.5238, "nll_loss": 0.49516239762306213, "rewards/accuracies": 0.875, "rewards/chosen": -0.193595290184021, "rewards/margins": 0.23382122814655304, "rewards/rejected": -0.42741653323173523, "step": 6733 }, { "epoch": 18.43668720054757, "grad_norm": 4.371334075927734, "learning_rate": 7.753424657534246e-08, "log_odds_chosen": 2.6790266036987305, "log_odds_ratio": -0.20166297256946564, "logits/chosen": 1.1828250885009766, "logits/rejected": 1.2426999807357788, "logps/chosen": -2.191376209259033, "logps/rejected": -4.74976921081543, "loss": 0.5619, "nll_loss": 0.5417687892913818, "rewards/accuracies": 1.0, "rewards/chosen": -0.21913760900497437, "rewards/margins": 0.25583934783935547, "rewards/rejected": -0.4749769866466522, "step": 6734 }, { "epoch": 18.439425051334702, "grad_norm": 5.698558807373047, "learning_rate": 7.73972602739726e-08, "log_odds_chosen": 2.861445426940918, "log_odds_ratio": -0.350063681602478, "logits/chosen": 1.0772202014923096, "logits/rejected": 1.0885635614395142, "logps/chosen": -2.7471601963043213, "logps/rejected": -5.4498515129089355, "loss": 0.6481, "nll_loss": 0.6130734086036682, "rewards/accuracies": 0.875, "rewards/chosen": -0.27471601963043213, "rewards/margins": 0.27026912569999695, "rewards/rejected": -0.5449851751327515, "step": 6735 }, { "epoch": 18.442162902121833, "grad_norm": 3.9028098583221436, "learning_rate": 7.726027397260274e-08, "log_odds_chosen": 3.1903860569000244, "log_odds_ratio": -0.3061676621437073, "logits/chosen": 0.9864886403083801, "logits/rejected": 1.0625649690628052, "logps/chosen": -2.3655080795288086, "logps/rejected": -5.451128005981445, "loss": 0.659, "nll_loss": 0.6283903121948242, "rewards/accuracies": 0.875, "rewards/chosen": -0.23655080795288086, "rewards/margins": 0.3085619807243347, "rewards/rejected": -0.5451127290725708, "step": 6736 }, { "epoch": 18.444900752908968, "grad_norm": 7.063562870025635, "learning_rate": 7.712328767123286e-08, "log_odds_chosen": 2.779876708984375, "log_odds_ratio": -0.20085817575454712, "logits/chosen": 1.0804760456085205, "logits/rejected": 1.1124458312988281, "logps/chosen": -2.5086965560913086, "logps/rejected": -5.156252861022949, "loss": 0.718, "nll_loss": 0.6979004144668579, "rewards/accuracies": 0.875, "rewards/chosen": -0.25086966156959534, "rewards/margins": 0.26475563645362854, "rewards/rejected": -0.5156252980232239, "step": 6737 }, { "epoch": 18.4476386036961, "grad_norm": 8.010315895080566, "learning_rate": 7.6986301369863e-08, "log_odds_chosen": 3.3055005073547363, "log_odds_ratio": -0.32766276597976685, "logits/chosen": 1.027172565460205, "logits/rejected": 1.0392115116119385, "logps/chosen": -3.1933341026306152, "logps/rejected": -6.369177341461182, "loss": 0.6508, "nll_loss": 0.6180168390274048, "rewards/accuracies": 0.875, "rewards/chosen": -0.31933343410491943, "rewards/margins": 0.3175842761993408, "rewards/rejected": -0.6369177103042603, "step": 6738 }, { "epoch": 18.45037645448323, "grad_norm": 4.846493244171143, "learning_rate": 7.684931506849316e-08, "log_odds_chosen": 2.118584394454956, "log_odds_ratio": -0.26895177364349365, "logits/chosen": 1.0289971828460693, "logits/rejected": 0.9487097859382629, "logps/chosen": -1.2523465156555176, "logps/rejected": -3.0304245948791504, "loss": 0.5061, "nll_loss": 0.47925055027008057, "rewards/accuracies": 1.0, "rewards/chosen": -0.1252346634864807, "rewards/margins": 0.1778077930212021, "rewards/rejected": -0.3030424416065216, "step": 6739 }, { "epoch": 18.45311430527036, "grad_norm": 3.307363510131836, "learning_rate": 7.67123287671233e-08, "log_odds_chosen": 2.5328431129455566, "log_odds_ratio": -0.16979901492595673, "logits/chosen": 1.1351019144058228, "logits/rejected": 1.1190135478973389, "logps/chosen": -1.4386811256408691, "logps/rejected": -3.7281904220581055, "loss": 0.4405, "nll_loss": 0.42350369691848755, "rewards/accuracies": 1.0, "rewards/chosen": -0.1438681185245514, "rewards/margins": 0.22895094752311707, "rewards/rejected": -0.37281906604766846, "step": 6740 }, { "epoch": 18.455852156057496, "grad_norm": 4.283796787261963, "learning_rate": 7.657534246575342e-08, "log_odds_chosen": 4.662544250488281, "log_odds_ratio": -0.074039027094841, "logits/chosen": 0.9560213088989258, "logits/rejected": 1.0054453611373901, "logps/chosen": -2.097728967666626, "logps/rejected": -6.57763671875, "loss": 0.6218, "nll_loss": 0.6143594980239868, "rewards/accuracies": 1.0, "rewards/chosen": -0.20977291464805603, "rewards/margins": 0.4479908049106598, "rewards/rejected": -0.6577637195587158, "step": 6741 }, { "epoch": 18.458590006844627, "grad_norm": 3.593825340270996, "learning_rate": 7.643835616438356e-08, "log_odds_chosen": 2.0174708366394043, "log_odds_ratio": -0.24095450341701508, "logits/chosen": 1.0994980335235596, "logits/rejected": 1.111943006515503, "logps/chosen": -1.8460900783538818, "logps/rejected": -3.700038433074951, "loss": 0.5319, "nll_loss": 0.5077798366546631, "rewards/accuracies": 1.0, "rewards/chosen": -0.18460901081562042, "rewards/margins": 0.18539488315582275, "rewards/rejected": -0.370003879070282, "step": 6742 }, { "epoch": 18.46132785763176, "grad_norm": 3.37011456489563, "learning_rate": 7.63013698630137e-08, "log_odds_chosen": 2.553992509841919, "log_odds_ratio": -0.12927885353565216, "logits/chosen": 1.2628231048583984, "logits/rejected": 1.2914570569992065, "logps/chosen": -1.6857903003692627, "logps/rejected": -4.043180465698242, "loss": 0.5469, "nll_loss": 0.5339251160621643, "rewards/accuracies": 1.0, "rewards/chosen": -0.16857904195785522, "rewards/margins": 0.23573897778987885, "rewards/rejected": -0.4043180048465729, "step": 6743 }, { "epoch": 18.46406570841889, "grad_norm": 4.0255208015441895, "learning_rate": 7.616438356164382e-08, "log_odds_chosen": 1.1231772899627686, "log_odds_ratio": -0.3590310215950012, "logits/chosen": 1.1651711463928223, "logits/rejected": 1.0846906900405884, "logps/chosen": -1.8265522718429565, "logps/rejected": -2.745150566101074, "loss": 0.4298, "nll_loss": 0.3939375877380371, "rewards/accuracies": 1.0, "rewards/chosen": -0.1826552301645279, "rewards/margins": 0.09185981750488281, "rewards/rejected": -0.2745150625705719, "step": 6744 }, { "epoch": 18.466803559206024, "grad_norm": 2.9641876220703125, "learning_rate": 7.602739726027398e-08, "log_odds_chosen": 3.6894960403442383, "log_odds_ratio": -0.18635332584381104, "logits/chosen": 0.9467258453369141, "logits/rejected": 0.9982626438140869, "logps/chosen": -1.2273820638656616, "logps/rejected": -4.650733947753906, "loss": 0.4261, "nll_loss": 0.40750131011009216, "rewards/accuracies": 1.0, "rewards/chosen": -0.12273820489645004, "rewards/margins": 0.34233519434928894, "rewards/rejected": -0.4650734066963196, "step": 6745 }, { "epoch": 18.469541409993155, "grad_norm": 3.803449869155884, "learning_rate": 7.589041095890411e-08, "log_odds_chosen": 4.986970901489258, "log_odds_ratio": -0.1985766589641571, "logits/chosen": 0.986281156539917, "logits/rejected": 1.0402281284332275, "logps/chosen": -1.6454601287841797, "logps/rejected": -6.428955078125, "loss": 0.5559, "nll_loss": 0.5360362529754639, "rewards/accuracies": 0.875, "rewards/chosen": -0.16454601287841797, "rewards/margins": 0.478349506855011, "rewards/rejected": -0.6428954601287842, "step": 6746 }, { "epoch": 18.472279260780287, "grad_norm": 3.682157039642334, "learning_rate": 7.575342465753424e-08, "log_odds_chosen": 2.908465623855591, "log_odds_ratio": -0.14925682544708252, "logits/chosen": 1.0168249607086182, "logits/rejected": 1.0214509963989258, "logps/chosen": -2.140672445297241, "logps/rejected": -4.836504936218262, "loss": 0.5024, "nll_loss": 0.4874586760997772, "rewards/accuracies": 1.0, "rewards/chosen": -0.2140672653913498, "rewards/margins": 0.26958322525024414, "rewards/rejected": -0.4836505055427551, "step": 6747 }, { "epoch": 18.47501711156742, "grad_norm": 7.132652282714844, "learning_rate": 7.561643835616438e-08, "log_odds_chosen": 1.3365353345870972, "log_odds_ratio": -0.45647791028022766, "logits/chosen": 1.0340025424957275, "logits/rejected": 0.996113657951355, "logps/chosen": -2.0217018127441406, "logps/rejected": -3.237196445465088, "loss": 0.5642, "nll_loss": 0.5185567140579224, "rewards/accuracies": 0.875, "rewards/chosen": -0.2021702080965042, "rewards/margins": 0.12154944241046906, "rewards/rejected": -0.3237196207046509, "step": 6748 }, { "epoch": 18.477754962354553, "grad_norm": 3.36021089553833, "learning_rate": 7.547945205479452e-08, "log_odds_chosen": 2.828934669494629, "log_odds_ratio": -0.2173212468624115, "logits/chosen": 1.0895086526870728, "logits/rejected": 1.0606846809387207, "logps/chosen": -1.3028775453567505, "logps/rejected": -3.780275821685791, "loss": 0.4356, "nll_loss": 0.41385602951049805, "rewards/accuracies": 1.0, "rewards/chosen": -0.1302877515554428, "rewards/margins": 0.24773983657360077, "rewards/rejected": -0.37802761793136597, "step": 6749 }, { "epoch": 18.480492813141684, "grad_norm": 3.2654800415039062, "learning_rate": 7.534246575342466e-08, "log_odds_chosen": 2.616875171661377, "log_odds_ratio": -0.1527239829301834, "logits/chosen": 1.3424949645996094, "logits/rejected": 1.2646769285202026, "logps/chosen": -1.4616341590881348, "logps/rejected": -3.8402719497680664, "loss": 0.4102, "nll_loss": 0.39492475986480713, "rewards/accuracies": 1.0, "rewards/chosen": -0.14616340398788452, "rewards/margins": 0.23786379396915436, "rewards/rejected": -0.3840271830558777, "step": 6750 }, { "epoch": 18.483230663928815, "grad_norm": 3.879425048828125, "learning_rate": 7.520547945205478e-08, "log_odds_chosen": 2.165982961654663, "log_odds_ratio": -0.2375163733959198, "logits/chosen": 1.054771900177002, "logits/rejected": 1.072980523109436, "logps/chosen": -2.5684261322021484, "logps/rejected": -4.621652603149414, "loss": 0.6192, "nll_loss": 0.5954631567001343, "rewards/accuracies": 0.875, "rewards/chosen": -0.25684261322021484, "rewards/margins": 0.20532262325286865, "rewards/rejected": -0.4621652662754059, "step": 6751 }, { "epoch": 18.48596851471595, "grad_norm": 3.409727096557617, "learning_rate": 7.506849315068493e-08, "log_odds_chosen": 2.527233600616455, "log_odds_ratio": -0.12364797294139862, "logits/chosen": 0.7984716296195984, "logits/rejected": 0.8045281767845154, "logps/chosen": -1.8798890113830566, "logps/rejected": -4.198907852172852, "loss": 0.5816, "nll_loss": 0.5692048668861389, "rewards/accuracies": 1.0, "rewards/chosen": -0.18798890709877014, "rewards/margins": 0.23190192878246307, "rewards/rejected": -0.41989079117774963, "step": 6752 }, { "epoch": 18.48870636550308, "grad_norm": 3.7951338291168213, "learning_rate": 7.493150684931507e-08, "log_odds_chosen": 4.932372093200684, "log_odds_ratio": -0.11001206934452057, "logits/chosen": 1.2498111724853516, "logits/rejected": 1.2931547164916992, "logps/chosen": -1.8924566507339478, "logps/rejected": -6.5791168212890625, "loss": 0.5755, "nll_loss": 0.5645025968551636, "rewards/accuracies": 1.0, "rewards/chosen": -0.18924567103385925, "rewards/margins": 0.46866607666015625, "rewards/rejected": -0.6579117178916931, "step": 6753 }, { "epoch": 18.491444216290212, "grad_norm": 8.38389778137207, "learning_rate": 7.47945205479452e-08, "log_odds_chosen": 1.943101167678833, "log_odds_ratio": -0.30955377221107483, "logits/chosen": 1.1597163677215576, "logits/rejected": 1.2121459245681763, "logps/chosen": -2.873718500137329, "logps/rejected": -4.751476287841797, "loss": 0.7181, "nll_loss": 0.6871151328086853, "rewards/accuracies": 0.875, "rewards/chosen": -0.28737184405326843, "rewards/margins": 0.18777576088905334, "rewards/rejected": -0.47514763474464417, "step": 6754 }, { "epoch": 18.494182067077343, "grad_norm": 3.9341604709625244, "learning_rate": 7.465753424657534e-08, "log_odds_chosen": 2.8597655296325684, "log_odds_ratio": -0.22012916207313538, "logits/chosen": 1.2888426780700684, "logits/rejected": 1.2286700010299683, "logps/chosen": -1.5383658409118652, "logps/rejected": -4.182506561279297, "loss": 0.51, "nll_loss": 0.48803579807281494, "rewards/accuracies": 0.875, "rewards/chosen": -0.15383657813072205, "rewards/margins": 0.26441410183906555, "rewards/rejected": -0.4182506799697876, "step": 6755 }, { "epoch": 18.496919917864478, "grad_norm": 4.526081085205078, "learning_rate": 7.452054794520548e-08, "log_odds_chosen": 4.333499908447266, "log_odds_ratio": -0.3998975455760956, "logits/chosen": 0.8800843358039856, "logits/rejected": 0.90971440076828, "logps/chosen": -2.230548858642578, "logps/rejected": -6.350002765655518, "loss": 0.599, "nll_loss": 0.5590598583221436, "rewards/accuracies": 0.875, "rewards/chosen": -0.2230548858642578, "rewards/margins": 0.4119453728199005, "rewards/rejected": -0.6350002884864807, "step": 6756 }, { "epoch": 18.49965776865161, "grad_norm": 3.7735469341278076, "learning_rate": 7.43835616438356e-08, "log_odds_chosen": 2.9631669521331787, "log_odds_ratio": -0.16499587893486023, "logits/chosen": 1.0855684280395508, "logits/rejected": 1.057936668395996, "logps/chosen": -2.5444185733795166, "logps/rejected": -5.343790054321289, "loss": 0.4891, "nll_loss": 0.47262874245643616, "rewards/accuracies": 1.0, "rewards/chosen": -0.25444185733795166, "rewards/margins": 0.27993708848953247, "rewards/rejected": -0.5343790054321289, "step": 6757 }, { "epoch": 18.50239561943874, "grad_norm": 4.58544921875, "learning_rate": 7.424657534246575e-08, "log_odds_chosen": 2.4638988971710205, "log_odds_ratio": -0.2820541262626648, "logits/chosen": 1.3195562362670898, "logits/rejected": 1.3859502077102661, "logps/chosen": -2.494843006134033, "logps/rejected": -4.7931342124938965, "loss": 0.5919, "nll_loss": 0.5636597871780396, "rewards/accuracies": 0.875, "rewards/chosen": -0.24948430061340332, "rewards/margins": 0.22982916235923767, "rewards/rejected": -0.4793134331703186, "step": 6758 }, { "epoch": 18.50513347022587, "grad_norm": 4.677412033081055, "learning_rate": 7.410958904109589e-08, "log_odds_chosen": 2.626284599304199, "log_odds_ratio": -0.2599579989910126, "logits/chosen": 1.478506088256836, "logits/rejected": 1.4506696462631226, "logps/chosen": -2.133445978164673, "logps/rejected": -4.5982866287231445, "loss": 0.5957, "nll_loss": 0.5697092413902283, "rewards/accuracies": 1.0, "rewards/chosen": -0.21334461867809296, "rewards/margins": 0.24648404121398926, "rewards/rejected": -0.459828644990921, "step": 6759 }, { "epoch": 18.507871321013006, "grad_norm": 3.876220941543579, "learning_rate": 7.397260273972603e-08, "log_odds_chosen": 2.2156076431274414, "log_odds_ratio": -0.2545185089111328, "logits/chosen": 1.4249167442321777, "logits/rejected": 1.2909698486328125, "logps/chosen": -1.6271849870681763, "logps/rejected": -3.676760196685791, "loss": 0.4323, "nll_loss": 0.40683218836784363, "rewards/accuracies": 1.0, "rewards/chosen": -0.1627185046672821, "rewards/margins": 0.20495754480361938, "rewards/rejected": -0.3676760494709015, "step": 6760 }, { "epoch": 18.510609171800137, "grad_norm": 8.217996597290039, "learning_rate": 7.383561643835616e-08, "log_odds_chosen": 1.0375486612319946, "log_odds_ratio": -0.4036519527435303, "logits/chosen": 1.162649154663086, "logits/rejected": 1.1692794561386108, "logps/chosen": -1.9147917032241821, "logps/rejected": -2.7895119190216064, "loss": 0.5902, "nll_loss": 0.5497973561286926, "rewards/accuracies": 0.875, "rewards/chosen": -0.19147919118404388, "rewards/margins": 0.08747202157974243, "rewards/rejected": -0.2789511978626251, "step": 6761 }, { "epoch": 18.51334702258727, "grad_norm": 7.9163312911987305, "learning_rate": 7.36986301369863e-08, "log_odds_chosen": 1.6249189376831055, "log_odds_ratio": -0.7808972597122192, "logits/chosen": 1.1108176708221436, "logits/rejected": 1.1134910583496094, "logps/chosen": -2.1178226470947266, "logps/rejected": -3.490269184112549, "loss": 0.6805, "nll_loss": 0.602432131767273, "rewards/accuracies": 0.875, "rewards/chosen": -0.21178226172924042, "rewards/margins": 0.13724465668201447, "rewards/rejected": -0.34902694821357727, "step": 6762 }, { "epoch": 18.5160848733744, "grad_norm": 3.1865458488464355, "learning_rate": 7.356164383561643e-08, "log_odds_chosen": 2.697680950164795, "log_odds_ratio": -0.19307628273963928, "logits/chosen": 1.037366271018982, "logits/rejected": 0.999718427658081, "logps/chosen": -1.6014504432678223, "logps/rejected": -3.973823070526123, "loss": 0.5169, "nll_loss": 0.49756690859794617, "rewards/accuracies": 1.0, "rewards/chosen": -0.16014505922794342, "rewards/margins": 0.23723727464675903, "rewards/rejected": -0.39738231897354126, "step": 6763 }, { "epoch": 18.518822724161534, "grad_norm": 6.1284379959106445, "learning_rate": 7.342465753424657e-08, "log_odds_chosen": 3.712503671646118, "log_odds_ratio": -0.2395043522119522, "logits/chosen": 1.2139956951141357, "logits/rejected": 1.2237051725387573, "logps/chosen": -2.4689712524414062, "logps/rejected": -5.998991012573242, "loss": 0.6625, "nll_loss": 0.6385393738746643, "rewards/accuracies": 0.75, "rewards/chosen": -0.2468971312046051, "rewards/margins": 0.3530019223690033, "rewards/rejected": -0.5998990535736084, "step": 6764 }, { "epoch": 18.521560574948666, "grad_norm": 4.590527534484863, "learning_rate": 7.328767123287671e-08, "log_odds_chosen": 3.313642740249634, "log_odds_ratio": -0.31144964694976807, "logits/chosen": 1.3019343614578247, "logits/rejected": 1.3409273624420166, "logps/chosen": -2.0097291469573975, "logps/rejected": -5.163387775421143, "loss": 0.6069, "nll_loss": 0.5757499933242798, "rewards/accuracies": 0.875, "rewards/chosen": -0.20097291469573975, "rewards/margins": 0.31536585092544556, "rewards/rejected": -0.5163387656211853, "step": 6765 }, { "epoch": 18.524298425735797, "grad_norm": 3.7629692554473877, "learning_rate": 7.315068493150685e-08, "log_odds_chosen": 2.354074239730835, "log_odds_ratio": -0.21472996473312378, "logits/chosen": 1.1645174026489258, "logits/rejected": 1.1610537767410278, "logps/chosen": -1.2359216213226318, "logps/rejected": -3.280989170074463, "loss": 0.3962, "nll_loss": 0.3747674226760864, "rewards/accuracies": 1.0, "rewards/chosen": -0.12359216064214706, "rewards/margins": 0.2045067399740219, "rewards/rejected": -0.32809892296791077, "step": 6766 }, { "epoch": 18.527036276522928, "grad_norm": 5.467573642730713, "learning_rate": 7.301369863013698e-08, "log_odds_chosen": 2.9791007041931152, "log_odds_ratio": -0.181069016456604, "logits/chosen": 0.8802807927131653, "logits/rejected": 0.8511218428611755, "logps/chosen": -2.405819892883301, "logps/rejected": -5.228977203369141, "loss": 0.5514, "nll_loss": 0.5333355069160461, "rewards/accuracies": 1.0, "rewards/chosen": -0.24058201909065247, "rewards/margins": 0.282315731048584, "rewards/rejected": -0.5228977203369141, "step": 6767 }, { "epoch": 18.529774127310063, "grad_norm": 3.3849799633026123, "learning_rate": 7.287671232876712e-08, "log_odds_chosen": 1.9532490968704224, "log_odds_ratio": -0.18952859938144684, "logits/chosen": 0.9631686210632324, "logits/rejected": 0.9874776005744934, "logps/chosen": -1.671197533607483, "logps/rejected": -3.381849527359009, "loss": 0.4832, "nll_loss": 0.4642372727394104, "rewards/accuracies": 1.0, "rewards/chosen": -0.16711977124214172, "rewards/margins": 0.17106521129608154, "rewards/rejected": -0.3381849527359009, "step": 6768 }, { "epoch": 18.532511978097194, "grad_norm": 3.692108631134033, "learning_rate": 7.273972602739725e-08, "log_odds_chosen": 3.2883224487304688, "log_odds_ratio": -0.15256208181381226, "logits/chosen": 1.2509196996688843, "logits/rejected": 1.2519986629486084, "logps/chosen": -1.5454384088516235, "logps/rejected": -4.618786334991455, "loss": 0.4735, "nll_loss": 0.4582575559616089, "rewards/accuracies": 1.0, "rewards/chosen": -0.15454384684562683, "rewards/margins": 0.3073347210884094, "rewards/rejected": -0.46187856793403625, "step": 6769 }, { "epoch": 18.535249828884325, "grad_norm": 6.984120845794678, "learning_rate": 7.260273972602739e-08, "log_odds_chosen": 2.745683193206787, "log_odds_ratio": -0.28182023763656616, "logits/chosen": 1.286200761795044, "logits/rejected": 1.2931631803512573, "logps/chosen": -2.0080502033233643, "logps/rejected": -4.536769866943359, "loss": 0.6393, "nll_loss": 0.6110891103744507, "rewards/accuracies": 0.875, "rewards/chosen": -0.20080502331256866, "rewards/margins": 0.2528719902038574, "rewards/rejected": -0.4536769986152649, "step": 6770 }, { "epoch": 18.537987679671456, "grad_norm": 3.475661277770996, "learning_rate": 7.246575342465753e-08, "log_odds_chosen": 3.5459585189819336, "log_odds_ratio": -0.10010385513305664, "logits/chosen": 1.0181294679641724, "logits/rejected": 1.0343281030654907, "logps/chosen": -1.7418549060821533, "logps/rejected": -5.066900730133057, "loss": 0.478, "nll_loss": 0.4679698944091797, "rewards/accuracies": 1.0, "rewards/chosen": -0.17418546974658966, "rewards/margins": 0.3325045704841614, "rewards/rejected": -0.5066900253295898, "step": 6771 }, { "epoch": 18.54072553045859, "grad_norm": 4.522558212280273, "learning_rate": 7.232876712328767e-08, "log_odds_chosen": 3.2966322898864746, "log_odds_ratio": -0.22241470217704773, "logits/chosen": 1.113325595855713, "logits/rejected": 1.0821202993392944, "logps/chosen": -1.8798730373382568, "logps/rejected": -5.004775047302246, "loss": 0.619, "nll_loss": 0.5967522859573364, "rewards/accuracies": 1.0, "rewards/chosen": -0.1879872828722, "rewards/margins": 0.31249019503593445, "rewards/rejected": -0.5004774928092957, "step": 6772 }, { "epoch": 18.543463381245722, "grad_norm": 8.261388778686523, "learning_rate": 7.219178082191781e-08, "log_odds_chosen": 4.022580623626709, "log_odds_ratio": -0.10722732543945312, "logits/chosen": 0.969366192817688, "logits/rejected": 0.9326616525650024, "logps/chosen": -1.858971357345581, "logps/rejected": -5.649396896362305, "loss": 0.5857, "nll_loss": 0.5750221610069275, "rewards/accuracies": 1.0, "rewards/chosen": -0.18589714169502258, "rewards/margins": 0.3790425658226013, "rewards/rejected": -0.5649396777153015, "step": 6773 }, { "epoch": 18.546201232032853, "grad_norm": 4.984898567199707, "learning_rate": 7.205479452054794e-08, "log_odds_chosen": 2.771760940551758, "log_odds_ratio": -0.19661785662174225, "logits/chosen": 1.0358961820602417, "logits/rejected": 0.993742823600769, "logps/chosen": -1.6704578399658203, "logps/rejected": -4.197728157043457, "loss": 0.5187, "nll_loss": 0.49902281165122986, "rewards/accuracies": 1.0, "rewards/chosen": -0.16704578697681427, "rewards/margins": 0.25272703170776367, "rewards/rejected": -0.41977283358573914, "step": 6774 }, { "epoch": 18.548939082819984, "grad_norm": 5.936652660369873, "learning_rate": 7.191780821917807e-08, "log_odds_chosen": 1.1386373043060303, "log_odds_ratio": -0.3508431017398834, "logits/chosen": 0.9543353915214539, "logits/rejected": 0.9114354848861694, "logps/chosen": -2.5973944664001465, "logps/rejected": -3.539185047149658, "loss": 0.5234, "nll_loss": 0.48829007148742676, "rewards/accuracies": 0.875, "rewards/chosen": -0.2597394585609436, "rewards/margins": 0.09417907148599625, "rewards/rejected": -0.35391849279403687, "step": 6775 }, { "epoch": 18.55167693360712, "grad_norm": 3.4373857975006104, "learning_rate": 7.178082191780821e-08, "log_odds_chosen": 3.969635009765625, "log_odds_ratio": -0.06457038223743439, "logits/chosen": 1.4186697006225586, "logits/rejected": 1.3737714290618896, "logps/chosen": -1.6967605352401733, "logps/rejected": -5.441057205200195, "loss": 0.5362, "nll_loss": 0.529778778553009, "rewards/accuracies": 1.0, "rewards/chosen": -0.1696760505437851, "rewards/margins": 0.37442970275878906, "rewards/rejected": -0.5441057682037354, "step": 6776 }, { "epoch": 18.55441478439425, "grad_norm": 12.299821853637695, "learning_rate": 7.164383561643835e-08, "log_odds_chosen": 3.445767402648926, "log_odds_ratio": -0.2888372838497162, "logits/chosen": 1.0158604383468628, "logits/rejected": 0.9615672826766968, "logps/chosen": -1.9955358505249023, "logps/rejected": -5.156667709350586, "loss": 0.5652, "nll_loss": 0.5363284349441528, "rewards/accuracies": 0.875, "rewards/chosen": -0.19955357909202576, "rewards/margins": 0.3161131739616394, "rewards/rejected": -0.5156667828559875, "step": 6777 }, { "epoch": 18.55715263518138, "grad_norm": 3.9479308128356934, "learning_rate": 7.150684931506849e-08, "log_odds_chosen": 2.8458356857299805, "log_odds_ratio": -0.1623132824897766, "logits/chosen": 1.1126047372817993, "logits/rejected": 1.121972680091858, "logps/chosen": -1.599414348602295, "logps/rejected": -4.2587995529174805, "loss": 0.496, "nll_loss": 0.47975248098373413, "rewards/accuracies": 1.0, "rewards/chosen": -0.1599414348602295, "rewards/margins": 0.26593852043151855, "rewards/rejected": -0.42587995529174805, "step": 6778 }, { "epoch": 18.559890485968516, "grad_norm": 3.860750675201416, "learning_rate": 7.136986301369863e-08, "log_odds_chosen": 2.8733556270599365, "log_odds_ratio": -0.10419610142707825, "logits/chosen": 1.091444730758667, "logits/rejected": 1.0300288200378418, "logps/chosen": -2.476260185241699, "logps/rejected": -5.217177391052246, "loss": 0.6085, "nll_loss": 0.5980949997901917, "rewards/accuracies": 1.0, "rewards/chosen": -0.24762600660324097, "rewards/margins": 0.2740917503833771, "rewards/rejected": -0.5217177867889404, "step": 6779 }, { "epoch": 18.562628336755647, "grad_norm": 9.575799942016602, "learning_rate": 7.123287671232877e-08, "log_odds_chosen": 2.558689832687378, "log_odds_ratio": -0.35423263907432556, "logits/chosen": 1.2088873386383057, "logits/rejected": 1.2871031761169434, "logps/chosen": -2.557393789291382, "logps/rejected": -4.992683410644531, "loss": 0.6539, "nll_loss": 0.6185087561607361, "rewards/accuracies": 0.875, "rewards/chosen": -0.25573939085006714, "rewards/margins": 0.24352899193763733, "rewards/rejected": -0.49926838278770447, "step": 6780 }, { "epoch": 18.56536618754278, "grad_norm": 3.566316604614258, "learning_rate": 7.10958904109589e-08, "log_odds_chosen": 2.4748759269714355, "log_odds_ratio": -0.19302260875701904, "logits/chosen": 0.9670522212982178, "logits/rejected": 1.0900989770889282, "logps/chosen": -1.7950994968414307, "logps/rejected": -4.116556644439697, "loss": 0.5797, "nll_loss": 0.5604196786880493, "rewards/accuracies": 1.0, "rewards/chosen": -0.1795099377632141, "rewards/margins": 0.23214571177959442, "rewards/rejected": -0.4116556644439697, "step": 6781 }, { "epoch": 18.56810403832991, "grad_norm": 3.445572853088379, "learning_rate": 7.095890410958903e-08, "log_odds_chosen": 3.4728963375091553, "log_odds_ratio": -0.16907721757888794, "logits/chosen": 1.0018292665481567, "logits/rejected": 0.9791229963302612, "logps/chosen": -1.9073762893676758, "logps/rejected": -5.231352806091309, "loss": 0.5356, "nll_loss": 0.5186821222305298, "rewards/accuracies": 1.0, "rewards/chosen": -0.19073761999607086, "rewards/margins": 0.3323976695537567, "rewards/rejected": -0.5231353044509888, "step": 6782 }, { "epoch": 18.570841889117045, "grad_norm": 10.780527114868164, "learning_rate": 7.082191780821918e-08, "log_odds_chosen": 1.834735631942749, "log_odds_ratio": -0.3325880169868469, "logits/chosen": 0.9353711009025574, "logits/rejected": 0.8600783944129944, "logps/chosen": -2.2187178134918213, "logps/rejected": -3.861431360244751, "loss": 0.514, "nll_loss": 0.4807480573654175, "rewards/accuracies": 0.875, "rewards/chosen": -0.2218717783689499, "rewards/margins": 0.16427135467529297, "rewards/rejected": -0.38614314794540405, "step": 6783 }, { "epoch": 18.573579739904176, "grad_norm": 3.7156848907470703, "learning_rate": 7.068493150684931e-08, "log_odds_chosen": 6.047207832336426, "log_odds_ratio": -0.11666058003902435, "logits/chosen": 0.8893032670021057, "logits/rejected": 0.8634331226348877, "logps/chosen": -2.3123457431793213, "logps/rejected": -8.197293281555176, "loss": 0.5688, "nll_loss": 0.5570990443229675, "rewards/accuracies": 1.0, "rewards/chosen": -0.2312345802783966, "rewards/margins": 0.5884947776794434, "rewards/rejected": -0.8197293281555176, "step": 6784 }, { "epoch": 18.576317590691307, "grad_norm": 6.029049873352051, "learning_rate": 7.054794520547945e-08, "log_odds_chosen": 3.0613749027252197, "log_odds_ratio": -0.19899752736091614, "logits/chosen": 1.0491595268249512, "logits/rejected": 0.9782212972640991, "logps/chosen": -1.9720284938812256, "logps/rejected": -4.819703102111816, "loss": 0.5395, "nll_loss": 0.5195695161819458, "rewards/accuracies": 1.0, "rewards/chosen": -0.1972028613090515, "rewards/margins": 0.2847674489021301, "rewards/rejected": -0.48197034001350403, "step": 6785 }, { "epoch": 18.579055441478438, "grad_norm": 11.082525253295898, "learning_rate": 7.041095890410959e-08, "log_odds_chosen": 0.9019473791122437, "log_odds_ratio": -0.7968220710754395, "logits/chosen": 0.9537838697433472, "logits/rejected": 0.9800757169723511, "logps/chosen": -2.550661563873291, "logps/rejected": -3.254016160964966, "loss": 0.647, "nll_loss": 0.5673115253448486, "rewards/accuracies": 0.625, "rewards/chosen": -0.2550661861896515, "rewards/margins": 0.07033544778823853, "rewards/rejected": -0.3254016041755676, "step": 6786 }, { "epoch": 18.581793292265573, "grad_norm": 3.762178659439087, "learning_rate": 7.027397260273971e-08, "log_odds_chosen": 3.3536036014556885, "log_odds_ratio": -0.1742658019065857, "logits/chosen": 1.2665209770202637, "logits/rejected": 1.2452419996261597, "logps/chosen": -2.1714255809783936, "logps/rejected": -5.342835426330566, "loss": 0.5014, "nll_loss": 0.4839288890361786, "rewards/accuracies": 1.0, "rewards/chosen": -0.21714255213737488, "rewards/margins": 0.31714099645614624, "rewards/rejected": -0.5342835783958435, "step": 6787 }, { "epoch": 18.584531143052704, "grad_norm": 3.780289649963379, "learning_rate": 7.013698630136985e-08, "log_odds_chosen": 2.8210248947143555, "log_odds_ratio": -0.1626242846250534, "logits/chosen": 1.1433751583099365, "logits/rejected": 1.035699725151062, "logps/chosen": -2.0061635971069336, "logps/rejected": -4.587916851043701, "loss": 0.5401, "nll_loss": 0.5238466262817383, "rewards/accuracies": 1.0, "rewards/chosen": -0.20061635971069336, "rewards/margins": 0.2581753134727478, "rewards/rejected": -0.45879167318344116, "step": 6788 }, { "epoch": 18.587268993839835, "grad_norm": 3.395395517349243, "learning_rate": 7e-08, "log_odds_chosen": 3.217155933380127, "log_odds_ratio": -0.16470026969909668, "logits/chosen": 1.2704927921295166, "logits/rejected": 1.2821060419082642, "logps/chosen": -1.3083884716033936, "logps/rejected": -4.273861408233643, "loss": 0.4577, "nll_loss": 0.44126802682876587, "rewards/accuracies": 1.0, "rewards/chosen": -0.13083884119987488, "rewards/margins": 0.2965472936630249, "rewards/rejected": -0.4273861348628998, "step": 6789 }, { "epoch": 18.590006844626966, "grad_norm": 3.9007747173309326, "learning_rate": 6.986301369863014e-08, "log_odds_chosen": 4.102423667907715, "log_odds_ratio": -0.17404785752296448, "logits/chosen": 1.0485788583755493, "logits/rejected": 0.9796338677406311, "logps/chosen": -1.4825401306152344, "logps/rejected": -5.344374179840088, "loss": 0.5716, "nll_loss": 0.5541663765907288, "rewards/accuracies": 1.0, "rewards/chosen": -0.14825400710105896, "rewards/margins": 0.3861834406852722, "rewards/rejected": -0.5344374179840088, "step": 6790 }, { "epoch": 18.5927446954141, "grad_norm": 3.680535078048706, "learning_rate": 6.972602739726027e-08, "log_odds_chosen": 2.1918740272521973, "log_odds_ratio": -0.21804098784923553, "logits/chosen": 1.1342084407806396, "logits/rejected": 1.0709309577941895, "logps/chosen": -1.406515121459961, "logps/rejected": -3.3833389282226562, "loss": 0.4228, "nll_loss": 0.40095385909080505, "rewards/accuracies": 1.0, "rewards/chosen": -0.14065152406692505, "rewards/margins": 0.19768236577510834, "rewards/rejected": -0.3383339047431946, "step": 6791 }, { "epoch": 18.595482546201232, "grad_norm": 3.7310848236083984, "learning_rate": 6.958904109589041e-08, "log_odds_chosen": 4.160253524780273, "log_odds_ratio": -0.3149385154247284, "logits/chosen": 1.087701439857483, "logits/rejected": 0.9989814162254333, "logps/chosen": -1.7413878440856934, "logps/rejected": -5.766898155212402, "loss": 0.5414, "nll_loss": 0.5099179148674011, "rewards/accuracies": 0.875, "rewards/chosen": -0.17413876950740814, "rewards/margins": 0.4025511145591736, "rewards/rejected": -0.5766898989677429, "step": 6792 }, { "epoch": 18.598220396988363, "grad_norm": 4.792390823364258, "learning_rate": 6.945205479452055e-08, "log_odds_chosen": 4.0156989097595215, "log_odds_ratio": -0.20130595564842224, "logits/chosen": 1.310414433479309, "logits/rejected": 1.385124683380127, "logps/chosen": -2.6130433082580566, "logps/rejected": -6.513366222381592, "loss": 0.6189, "nll_loss": 0.5987390875816345, "rewards/accuracies": 1.0, "rewards/chosen": -0.2613043189048767, "rewards/margins": 0.3900323212146759, "rewards/rejected": -0.6513366103172302, "step": 6793 }, { "epoch": 18.600958247775495, "grad_norm": 6.891894340515137, "learning_rate": 6.931506849315067e-08, "log_odds_chosen": 3.5732028484344482, "log_odds_ratio": -0.10566383600234985, "logits/chosen": 0.9545915722846985, "logits/rejected": 0.9742358922958374, "logps/chosen": -1.9392670392990112, "logps/rejected": -5.350326061248779, "loss": 0.716, "nll_loss": 0.7054097056388855, "rewards/accuracies": 1.0, "rewards/chosen": -0.19392669200897217, "rewards/margins": 0.3411059081554413, "rewards/rejected": -0.5350326299667358, "step": 6794 }, { "epoch": 18.60369609856263, "grad_norm": 4.439833164215088, "learning_rate": 6.917808219178081e-08, "log_odds_chosen": 3.0963492393493652, "log_odds_ratio": -0.18415775895118713, "logits/chosen": 1.1565978527069092, "logits/rejected": 1.1949758529663086, "logps/chosen": -1.9331632852554321, "logps/rejected": -4.83453369140625, "loss": 0.5669, "nll_loss": 0.5485312938690186, "rewards/accuracies": 1.0, "rewards/chosen": -0.19331634044647217, "rewards/margins": 0.29013702273368835, "rewards/rejected": -0.4834533631801605, "step": 6795 }, { "epoch": 18.60643394934976, "grad_norm": 3.466468334197998, "learning_rate": 6.904109589041096e-08, "log_odds_chosen": 3.1787729263305664, "log_odds_ratio": -0.10488906502723694, "logits/chosen": 1.1526939868927002, "logits/rejected": 1.1774678230285645, "logps/chosen": -2.0839531421661377, "logps/rejected": -5.090263366699219, "loss": 0.5359, "nll_loss": 0.5253679156303406, "rewards/accuracies": 1.0, "rewards/chosen": -0.20839530229568481, "rewards/margins": 0.300631046295166, "rewards/rejected": -0.5090263485908508, "step": 6796 }, { "epoch": 18.60917180013689, "grad_norm": 3.503042221069336, "learning_rate": 6.890410958904109e-08, "log_odds_chosen": 6.02691650390625, "log_odds_ratio": -0.047979556024074554, "logits/chosen": 1.1472342014312744, "logits/rejected": 1.2425637245178223, "logps/chosen": -1.9278037548065186, "logps/rejected": -7.546351432800293, "loss": 0.661, "nll_loss": 0.6561987400054932, "rewards/accuracies": 1.0, "rewards/chosen": -0.19278037548065186, "rewards/margins": 0.5618547201156616, "rewards/rejected": -0.7546350955963135, "step": 6797 }, { "epoch": 18.611909650924023, "grad_norm": 9.003030776977539, "learning_rate": 6.876712328767123e-08, "log_odds_chosen": 1.255000114440918, "log_odds_ratio": -0.5282196998596191, "logits/chosen": 0.9676891565322876, "logits/rejected": 0.9424188137054443, "logps/chosen": -2.0801806449890137, "logps/rejected": -3.206838369369507, "loss": 0.512, "nll_loss": 0.45917046070098877, "rewards/accuracies": 0.875, "rewards/chosen": -0.20801804959774017, "rewards/margins": 0.11266579478979111, "rewards/rejected": -0.3206838369369507, "step": 6798 }, { "epoch": 18.614647501711158, "grad_norm": 3.9304757118225098, "learning_rate": 6.863013698630137e-08, "log_odds_chosen": 3.4844188690185547, "log_odds_ratio": -0.12089187651872635, "logits/chosen": 1.0523358583450317, "logits/rejected": 1.0945249795913696, "logps/chosen": -1.6705632209777832, "logps/rejected": -4.869062423706055, "loss": 0.5828, "nll_loss": 0.5707341432571411, "rewards/accuracies": 1.0, "rewards/chosen": -0.16705632209777832, "rewards/margins": 0.3198499083518982, "rewards/rejected": -0.4869062602519989, "step": 6799 }, { "epoch": 18.61738535249829, "grad_norm": 3.7315220832824707, "learning_rate": 6.84931506849315e-08, "log_odds_chosen": 3.6679139137268066, "log_odds_ratio": -0.08850868046283722, "logits/chosen": 1.0072484016418457, "logits/rejected": 1.0149118900299072, "logps/chosen": -1.9947938919067383, "logps/rejected": -5.41343879699707, "loss": 0.6078, "nll_loss": 0.598940908908844, "rewards/accuracies": 1.0, "rewards/chosen": -0.1994793862104416, "rewards/margins": 0.34186452627182007, "rewards/rejected": -0.5413439273834229, "step": 6800 }, { "epoch": 18.62012320328542, "grad_norm": 3.4619040489196777, "learning_rate": 6.835616438356163e-08, "log_odds_chosen": 2.240705966949463, "log_odds_ratio": -0.18068605661392212, "logits/chosen": 1.158432126045227, "logits/rejected": 1.1761349439620972, "logps/chosen": -1.55440092086792, "logps/rejected": -3.571528434753418, "loss": 0.4502, "nll_loss": 0.4321210980415344, "rewards/accuracies": 1.0, "rewards/chosen": -0.155440092086792, "rewards/margins": 0.20171275734901428, "rewards/rejected": -0.3571528494358063, "step": 6801 }, { "epoch": 18.622861054072555, "grad_norm": 9.637399673461914, "learning_rate": 6.821917808219178e-08, "log_odds_chosen": 0.999815821647644, "log_odds_ratio": -0.5620440244674683, "logits/chosen": 1.0472501516342163, "logits/rejected": 1.0181074142456055, "logps/chosen": -2.6178011894226074, "logps/rejected": -3.466174602508545, "loss": 0.7269, "nll_loss": 0.6706691384315491, "rewards/accuracies": 0.75, "rewards/chosen": -0.26178014278411865, "rewards/margins": 0.08483735471963882, "rewards/rejected": -0.3466174602508545, "step": 6802 }, { "epoch": 18.625598904859686, "grad_norm": 3.884655237197876, "learning_rate": 6.808219178082192e-08, "log_odds_chosen": 4.988759994506836, "log_odds_ratio": -0.03998483717441559, "logits/chosen": 0.8713281154632568, "logits/rejected": 0.8277037143707275, "logps/chosen": -2.177971363067627, "logps/rejected": -6.981104373931885, "loss": 0.7688, "nll_loss": 0.7647892236709595, "rewards/accuracies": 1.0, "rewards/chosen": -0.2177971601486206, "rewards/margins": 0.4803132712841034, "rewards/rejected": -0.6981104016304016, "step": 6803 }, { "epoch": 18.628336755646817, "grad_norm": 4.817233562469482, "learning_rate": 6.794520547945205e-08, "log_odds_chosen": 4.301478385925293, "log_odds_ratio": -0.16711720824241638, "logits/chosen": 0.884118914604187, "logits/rejected": 0.8500539660453796, "logps/chosen": -2.600954532623291, "logps/rejected": -6.696990966796875, "loss": 0.6012, "nll_loss": 0.5845239162445068, "rewards/accuracies": 1.0, "rewards/chosen": -0.260095477104187, "rewards/margins": 0.40960365533828735, "rewards/rejected": -0.6696990728378296, "step": 6804 }, { "epoch": 18.63107460643395, "grad_norm": 3.9330661296844482, "learning_rate": 6.780821917808219e-08, "log_odds_chosen": 4.380179405212402, "log_odds_ratio": -0.1902804970741272, "logits/chosen": 0.9494511485099792, "logits/rejected": 0.9435478448867798, "logps/chosen": -1.7200148105621338, "logps/rejected": -5.944277763366699, "loss": 0.5677, "nll_loss": 0.5486859083175659, "rewards/accuracies": 1.0, "rewards/chosen": -0.17200146615505219, "rewards/margins": 0.42242631316185, "rewards/rejected": -0.594427764415741, "step": 6805 }, { "epoch": 18.633812457221083, "grad_norm": 4.835897445678711, "learning_rate": 6.767123287671233e-08, "log_odds_chosen": 1.2750434875488281, "log_odds_ratio": -0.4298965036869049, "logits/chosen": 1.0242995023727417, "logits/rejected": 0.953791618347168, "logps/chosen": -2.2251358032226562, "logps/rejected": -3.2731220722198486, "loss": 0.4793, "nll_loss": 0.4363090693950653, "rewards/accuracies": 0.75, "rewards/chosen": -0.2225135862827301, "rewards/margins": 0.10479864478111267, "rewards/rejected": -0.3273122310638428, "step": 6806 }, { "epoch": 18.636550308008214, "grad_norm": 5.692923069000244, "learning_rate": 6.753424657534245e-08, "log_odds_chosen": 2.2616772651672363, "log_odds_ratio": -0.33359232544898987, "logits/chosen": 1.0898858308792114, "logits/rejected": 1.0633068084716797, "logps/chosen": -1.8659050464630127, "logps/rejected": -3.9739484786987305, "loss": 0.6352, "nll_loss": 0.6018561124801636, "rewards/accuracies": 0.875, "rewards/chosen": -0.1865905225276947, "rewards/margins": 0.21080434322357178, "rewards/rejected": -0.3973948359489441, "step": 6807 }, { "epoch": 18.639288158795345, "grad_norm": 3.7877566814422607, "learning_rate": 6.73972602739726e-08, "log_odds_chosen": 3.9320335388183594, "log_odds_ratio": -0.15927304327487946, "logits/chosen": 0.9819549322128296, "logits/rejected": 0.8669609427452087, "logps/chosen": -1.5409457683563232, "logps/rejected": -5.256662845611572, "loss": 0.4691, "nll_loss": 0.45317429304122925, "rewards/accuracies": 1.0, "rewards/chosen": -0.15409457683563232, "rewards/margins": 0.37157168984413147, "rewards/rejected": -0.5256662964820862, "step": 6808 }, { "epoch": 18.642026009582477, "grad_norm": 4.492136001586914, "learning_rate": 6.726027397260274e-08, "log_odds_chosen": 2.6204652786254883, "log_odds_ratio": -0.22829516232013702, "logits/chosen": 0.8160138130187988, "logits/rejected": 0.7673341035842896, "logps/chosen": -1.7313084602355957, "logps/rejected": -4.146358489990234, "loss": 0.4521, "nll_loss": 0.42931345105171204, "rewards/accuracies": 0.875, "rewards/chosen": -0.1731308549642563, "rewards/margins": 0.24150501191616058, "rewards/rejected": -0.41463586688041687, "step": 6809 }, { "epoch": 18.64476386036961, "grad_norm": 3.7622344493865967, "learning_rate": 6.712328767123288e-08, "log_odds_chosen": 1.2925012111663818, "log_odds_ratio": -0.39192208647727966, "logits/chosen": 0.864925742149353, "logits/rejected": 0.8009563088417053, "logps/chosen": -1.7971670627593994, "logps/rejected": -2.956146240234375, "loss": 0.6456, "nll_loss": 0.6064265370368958, "rewards/accuracies": 0.875, "rewards/chosen": -0.17971670627593994, "rewards/margins": 0.11589791625738144, "rewards/rejected": -0.2956146001815796, "step": 6810 }, { "epoch": 18.647501711156742, "grad_norm": 3.8231494426727295, "learning_rate": 6.6986301369863e-08, "log_odds_chosen": 3.293200969696045, "log_odds_ratio": -0.22637030482292175, "logits/chosen": 1.1409759521484375, "logits/rejected": 1.173695683479309, "logps/chosen": -2.0821614265441895, "logps/rejected": -5.273736000061035, "loss": 0.6234, "nll_loss": 0.6007508039474487, "rewards/accuracies": 1.0, "rewards/chosen": -0.20821616053581238, "rewards/margins": 0.3191574215888977, "rewards/rejected": -0.5273735523223877, "step": 6811 }, { "epoch": 18.650239561943874, "grad_norm": 7.048490047454834, "learning_rate": 6.684931506849315e-08, "log_odds_chosen": 3.0119948387145996, "log_odds_ratio": -0.3460255563259125, "logits/chosen": 1.272026538848877, "logits/rejected": 1.2698527574539185, "logps/chosen": -1.7106938362121582, "logps/rejected": -4.477133750915527, "loss": 0.4761, "nll_loss": 0.4414478540420532, "rewards/accuracies": 0.875, "rewards/chosen": -0.17106939852237701, "rewards/margins": 0.2766440212726593, "rewards/rejected": -0.4477134048938751, "step": 6812 }, { "epoch": 18.652977412731005, "grad_norm": 3.9377822875976562, "learning_rate": 6.671232876712328e-08, "log_odds_chosen": 3.508749008178711, "log_odds_ratio": -0.11878635734319687, "logits/chosen": 1.2399561405181885, "logits/rejected": 1.2400504350662231, "logps/chosen": -2.080754280090332, "logps/rejected": -5.424423694610596, "loss": 0.589, "nll_loss": 0.5771357417106628, "rewards/accuracies": 1.0, "rewards/chosen": -0.20807543396949768, "rewards/margins": 0.33436694741249084, "rewards/rejected": -0.5424423813819885, "step": 6813 }, { "epoch": 18.65571526351814, "grad_norm": 3.7889440059661865, "learning_rate": 6.657534246575342e-08, "log_odds_chosen": 2.400052309036255, "log_odds_ratio": -0.244886577129364, "logits/chosen": 0.9097465872764587, "logits/rejected": 0.9979220628738403, "logps/chosen": -2.246213674545288, "logps/rejected": -4.561573028564453, "loss": 0.7051, "nll_loss": 0.6806255578994751, "rewards/accuracies": 0.875, "rewards/chosen": -0.22462138533592224, "rewards/margins": 0.2315359115600586, "rewards/rejected": -0.45615729689598083, "step": 6814 }, { "epoch": 18.65845311430527, "grad_norm": 3.4950687885284424, "learning_rate": 6.643835616438356e-08, "log_odds_chosen": 2.8213729858398438, "log_odds_ratio": -0.1583067923784256, "logits/chosen": 1.109449028968811, "logits/rejected": 1.0517375469207764, "logps/chosen": -1.6086413860321045, "logps/rejected": -4.103964805603027, "loss": 0.5097, "nll_loss": 0.49391722679138184, "rewards/accuracies": 1.0, "rewards/chosen": -0.16086414456367493, "rewards/margins": 0.2495322972536087, "rewards/rejected": -0.4103964567184448, "step": 6815 }, { "epoch": 18.661190965092402, "grad_norm": 3.829801082611084, "learning_rate": 6.63013698630137e-08, "log_odds_chosen": 5.31935977935791, "log_odds_ratio": -0.1578046977519989, "logits/chosen": 1.266082525253296, "logits/rejected": 1.3174924850463867, "logps/chosen": -2.9136385917663574, "logps/rejected": -8.17398452758789, "loss": 0.6123, "nll_loss": 0.596473753452301, "rewards/accuracies": 0.875, "rewards/chosen": -0.2913638651371002, "rewards/margins": 0.5260346531867981, "rewards/rejected": -0.8173985481262207, "step": 6816 }, { "epoch": 18.663928815879533, "grad_norm": 3.861645460128784, "learning_rate": 6.616438356164384e-08, "log_odds_chosen": 2.964611053466797, "log_odds_ratio": -0.16100451350212097, "logits/chosen": 1.4099695682525635, "logits/rejected": 1.4353402853012085, "logps/chosen": -2.2576916217803955, "logps/rejected": -5.001810550689697, "loss": 0.5194, "nll_loss": 0.5032806396484375, "rewards/accuracies": 1.0, "rewards/chosen": -0.22576917707920074, "rewards/margins": 0.2744118869304657, "rewards/rejected": -0.5001810789108276, "step": 6817 }, { "epoch": 18.666666666666668, "grad_norm": 7.365693092346191, "learning_rate": 6.602739726027397e-08, "log_odds_chosen": 3.8654491901397705, "log_odds_ratio": -0.23045970499515533, "logits/chosen": 1.168789267539978, "logits/rejected": 1.208806037902832, "logps/chosen": -2.1130077838897705, "logps/rejected": -5.805901050567627, "loss": 0.535, "nll_loss": 0.5119229555130005, "rewards/accuracies": 0.875, "rewards/chosen": -0.211300790309906, "rewards/margins": 0.3692892789840698, "rewards/rejected": -0.5805900692939758, "step": 6818 }, { "epoch": 18.6694045174538, "grad_norm": 5.603839874267578, "learning_rate": 6.58904109589041e-08, "log_odds_chosen": 2.943754196166992, "log_odds_ratio": -0.2790524661540985, "logits/chosen": 1.2000843286514282, "logits/rejected": 1.211815595626831, "logps/chosen": -2.3473455905914307, "logps/rejected": -5.025687217712402, "loss": 0.5958, "nll_loss": 0.5679311156272888, "rewards/accuracies": 0.875, "rewards/chosen": -0.23473456501960754, "rewards/margins": 0.26783421635627747, "rewards/rejected": -0.502568781375885, "step": 6819 }, { "epoch": 18.67214236824093, "grad_norm": 3.663662910461426, "learning_rate": 6.575342465753424e-08, "log_odds_chosen": 2.7039806842803955, "log_odds_ratio": -0.15915939211845398, "logits/chosen": 1.380688190460205, "logits/rejected": 1.325886845588684, "logps/chosen": -1.6152957677841187, "logps/rejected": -4.124876976013184, "loss": 0.4864, "nll_loss": 0.47049680352211, "rewards/accuracies": 1.0, "rewards/chosen": -0.16152958571910858, "rewards/margins": 0.2509581446647644, "rewards/rejected": -0.4124877154827118, "step": 6820 }, { "epoch": 18.67488021902806, "grad_norm": 3.7886340618133545, "learning_rate": 6.561643835616438e-08, "log_odds_chosen": 1.1178724765777588, "log_odds_ratio": -0.381529301404953, "logits/chosen": 0.901044487953186, "logits/rejected": 0.9070007801055908, "logps/chosen": -1.5043680667877197, "logps/rejected": -2.473526954650879, "loss": 0.4764, "nll_loss": 0.43820685148239136, "rewards/accuracies": 0.875, "rewards/chosen": -0.15043680369853973, "rewards/margins": 0.09691589325666428, "rewards/rejected": -0.2473526895046234, "step": 6821 }, { "epoch": 18.677618069815196, "grad_norm": 3.873666524887085, "learning_rate": 6.547945205479452e-08, "log_odds_chosen": 3.0819571018218994, "log_odds_ratio": -0.1512136161327362, "logits/chosen": 1.3371344804763794, "logits/rejected": 1.3724393844604492, "logps/chosen": -2.140547037124634, "logps/rejected": -5.106813430786133, "loss": 0.5214, "nll_loss": 0.5062773823738098, "rewards/accuracies": 1.0, "rewards/chosen": -0.21405471861362457, "rewards/margins": 0.2966266870498657, "rewards/rejected": -0.5106813907623291, "step": 6822 }, { "epoch": 18.680355920602327, "grad_norm": 5.221455097198486, "learning_rate": 6.534246575342466e-08, "log_odds_chosen": 3.6802024841308594, "log_odds_ratio": -0.14932292699813843, "logits/chosen": 1.125010371208191, "logits/rejected": 1.0457520484924316, "logps/chosen": -1.887191891670227, "logps/rejected": -5.383621692657471, "loss": 0.5041, "nll_loss": 0.48913252353668213, "rewards/accuracies": 1.0, "rewards/chosen": -0.18871918320655823, "rewards/margins": 0.3496429920196533, "rewards/rejected": -0.5383622050285339, "step": 6823 }, { "epoch": 18.68309377138946, "grad_norm": 4.430304050445557, "learning_rate": 6.520547945205478e-08, "log_odds_chosen": 3.8468141555786133, "log_odds_ratio": -0.04503817483782768, "logits/chosen": 1.3880692720413208, "logits/rejected": 1.4703706502914429, "logps/chosen": -2.390105724334717, "logps/rejected": -6.130420684814453, "loss": 0.6228, "nll_loss": 0.618268609046936, "rewards/accuracies": 1.0, "rewards/chosen": -0.23901058733463287, "rewards/margins": 0.3740314841270447, "rewards/rejected": -0.6130421161651611, "step": 6824 }, { "epoch": 18.68583162217659, "grad_norm": 3.9849233627319336, "learning_rate": 6.506849315068492e-08, "log_odds_chosen": 2.041956901550293, "log_odds_ratio": -0.4601123631000519, "logits/chosen": 1.1518940925598145, "logits/rejected": 1.1419376134872437, "logps/chosen": -1.836162805557251, "logps/rejected": -3.8277828693389893, "loss": 0.5096, "nll_loss": 0.46355777978897095, "rewards/accuracies": 0.625, "rewards/chosen": -0.1836162805557251, "rewards/margins": 0.19916200637817383, "rewards/rejected": -0.3827782869338989, "step": 6825 }, { "epoch": 18.688569472963724, "grad_norm": 3.5136430263519287, "learning_rate": 6.493150684931506e-08, "log_odds_chosen": 3.157686710357666, "log_odds_ratio": -0.12169455736875534, "logits/chosen": 0.9082849621772766, "logits/rejected": 0.8849281072616577, "logps/chosen": -2.1944985389709473, "logps/rejected": -5.218149662017822, "loss": 0.5125, "nll_loss": 0.5003742575645447, "rewards/accuracies": 1.0, "rewards/chosen": -0.21944986283779144, "rewards/margins": 0.3023650646209717, "rewards/rejected": -0.5218149423599243, "step": 6826 }, { "epoch": 18.691307323750856, "grad_norm": 13.884379386901855, "learning_rate": 6.479452054794521e-08, "log_odds_chosen": 1.4447529315948486, "log_odds_ratio": -0.8485448956489563, "logits/chosen": 1.0696666240692139, "logits/rejected": 1.0094846487045288, "logps/chosen": -3.0812788009643555, "logps/rejected": -4.326498508453369, "loss": 0.5728, "nll_loss": 0.48795703053474426, "rewards/accuracies": 0.75, "rewards/chosen": -0.30812788009643555, "rewards/margins": 0.12452194839715958, "rewards/rejected": -0.4326498210430145, "step": 6827 }, { "epoch": 18.694045174537987, "grad_norm": 4.003813743591309, "learning_rate": 6.465753424657534e-08, "log_odds_chosen": 3.1829538345336914, "log_odds_ratio": -0.11409065872430801, "logits/chosen": 1.1302074193954468, "logits/rejected": 1.049839735031128, "logps/chosen": -2.1058595180511475, "logps/rejected": -5.157287120819092, "loss": 0.6155, "nll_loss": 0.6041030287742615, "rewards/accuracies": 1.0, "rewards/chosen": -0.21058595180511475, "rewards/margins": 0.3051428198814392, "rewards/rejected": -0.515728771686554, "step": 6828 }, { "epoch": 18.69678302532512, "grad_norm": 3.2098562717437744, "learning_rate": 6.452054794520548e-08, "log_odds_chosen": 4.378502368927002, "log_odds_ratio": -0.15332773327827454, "logits/chosen": 1.1053999662399292, "logits/rejected": 1.0442602634429932, "logps/chosen": -2.119811534881592, "logps/rejected": -6.3389387130737305, "loss": 0.5437, "nll_loss": 0.5283355712890625, "rewards/accuracies": 1.0, "rewards/chosen": -0.2119811773300171, "rewards/margins": 0.4219127297401428, "rewards/rejected": -0.6338939070701599, "step": 6829 }, { "epoch": 18.699520876112253, "grad_norm": 3.768765687942505, "learning_rate": 6.438356164383562e-08, "log_odds_chosen": 3.3819262981414795, "log_odds_ratio": -0.18493972718715668, "logits/chosen": 0.8015354871749878, "logits/rejected": 0.7466049194335938, "logps/chosen": -1.9646966457366943, "logps/rejected": -5.1727471351623535, "loss": 0.5345, "nll_loss": 0.5160303711891174, "rewards/accuracies": 1.0, "rewards/chosen": -0.19646967947483063, "rewards/margins": 0.32080507278442383, "rewards/rejected": -0.5172747373580933, "step": 6830 }, { "epoch": 18.702258726899384, "grad_norm": 5.32232666015625, "learning_rate": 6.424657534246574e-08, "log_odds_chosen": 2.9332118034362793, "log_odds_ratio": -0.12479137629270554, "logits/chosen": 1.1083301305770874, "logits/rejected": 1.1033611297607422, "logps/chosen": -1.979239821434021, "logps/rejected": -4.641120910644531, "loss": 0.471, "nll_loss": 0.45851314067840576, "rewards/accuracies": 1.0, "rewards/chosen": -0.19792400300502777, "rewards/margins": 0.2661881446838379, "rewards/rejected": -0.46411213278770447, "step": 6831 }, { "epoch": 18.704996577686515, "grad_norm": 3.5105721950531006, "learning_rate": 6.410958904109588e-08, "log_odds_chosen": 3.8163273334503174, "log_odds_ratio": -0.17299829423427582, "logits/chosen": 1.0371299982070923, "logits/rejected": 0.9558394551277161, "logps/chosen": -1.2784829139709473, "logps/rejected": -4.791060447692871, "loss": 0.4593, "nll_loss": 0.4419674277305603, "rewards/accuracies": 1.0, "rewards/chosen": -0.127848282456398, "rewards/margins": 0.3512578010559082, "rewards/rejected": -0.479106068611145, "step": 6832 }, { "epoch": 18.70773442847365, "grad_norm": 3.540834903717041, "learning_rate": 6.397260273972603e-08, "log_odds_chosen": 4.488333702087402, "log_odds_ratio": -0.17565856873989105, "logits/chosen": 0.9328530430793762, "logits/rejected": 0.9157485365867615, "logps/chosen": -1.909806489944458, "logps/rejected": -6.232604026794434, "loss": 0.5883, "nll_loss": 0.5707632303237915, "rewards/accuracies": 0.875, "rewards/chosen": -0.19098064303398132, "rewards/margins": 0.43227970600128174, "rewards/rejected": -0.6232603788375854, "step": 6833 }, { "epoch": 18.71047227926078, "grad_norm": 3.3561182022094727, "learning_rate": 6.383561643835616e-08, "log_odds_chosen": 1.7876216173171997, "log_odds_ratio": -0.2393883466720581, "logits/chosen": 1.1001826524734497, "logits/rejected": 1.0948216915130615, "logps/chosen": -1.3483221530914307, "logps/rejected": -2.907407760620117, "loss": 0.4252, "nll_loss": 0.40127137303352356, "rewards/accuracies": 1.0, "rewards/chosen": -0.1348322331905365, "rewards/margins": 0.15590856969356537, "rewards/rejected": -0.2907407879829407, "step": 6834 }, { "epoch": 18.713210130047912, "grad_norm": 3.0028538703918457, "learning_rate": 6.36986301369863e-08, "log_odds_chosen": 3.4238479137420654, "log_odds_ratio": -0.11850416660308838, "logits/chosen": 1.2520525455474854, "logits/rejected": 1.219520092010498, "logps/chosen": -1.872123122215271, "logps/rejected": -5.085385322570801, "loss": 0.475, "nll_loss": 0.463168203830719, "rewards/accuracies": 1.0, "rewards/chosen": -0.18721231818199158, "rewards/margins": 0.32132622599601746, "rewards/rejected": -0.5085386037826538, "step": 6835 }, { "epoch": 18.715947980835043, "grad_norm": 3.8562870025634766, "learning_rate": 6.356164383561644e-08, "log_odds_chosen": 1.7485111951828003, "log_odds_ratio": -0.2234734296798706, "logits/chosen": 1.0390989780426025, "logits/rejected": 0.9618690609931946, "logps/chosen": -1.8529192209243774, "logps/rejected": -3.458559036254883, "loss": 0.5193, "nll_loss": 0.4969358444213867, "rewards/accuracies": 1.0, "rewards/chosen": -0.18529191613197327, "rewards/margins": 0.16056397557258606, "rewards/rejected": -0.3458558917045593, "step": 6836 }, { "epoch": 18.718685831622178, "grad_norm": 3.8091928958892822, "learning_rate": 6.342465753424658e-08, "log_odds_chosen": 2.456465721130371, "log_odds_ratio": -0.15729168057441711, "logits/chosen": 0.9644219279289246, "logits/rejected": 0.9218119978904724, "logps/chosen": -1.747554063796997, "logps/rejected": -4.0260701179504395, "loss": 0.4784, "nll_loss": 0.4626457691192627, "rewards/accuracies": 1.0, "rewards/chosen": -0.17475540935993195, "rewards/margins": 0.22785158455371857, "rewards/rejected": -0.4026070237159729, "step": 6837 }, { "epoch": 18.72142368240931, "grad_norm": 4.498661994934082, "learning_rate": 6.32876712328767e-08, "log_odds_chosen": 2.7175211906433105, "log_odds_ratio": -0.15713751316070557, "logits/chosen": 1.0962737798690796, "logits/rejected": 1.0901638269424438, "logps/chosen": -1.7952744960784912, "logps/rejected": -4.254508018493652, "loss": 0.5769, "nll_loss": 0.5611735582351685, "rewards/accuracies": 1.0, "rewards/chosen": -0.17952746152877808, "rewards/margins": 0.24592335522174835, "rewards/rejected": -0.42545080184936523, "step": 6838 }, { "epoch": 18.72416153319644, "grad_norm": 4.039248466491699, "learning_rate": 6.315068493150684e-08, "log_odds_chosen": 1.9204241037368774, "log_odds_ratio": -0.19621731340885162, "logits/chosen": 1.10825777053833, "logits/rejected": 1.0446701049804688, "logps/chosen": -2.038438081741333, "logps/rejected": -3.782214879989624, "loss": 0.5589, "nll_loss": 0.5393000841140747, "rewards/accuracies": 1.0, "rewards/chosen": -0.20384378731250763, "rewards/margins": 0.1743776947259903, "rewards/rejected": -0.3782215118408203, "step": 6839 }, { "epoch": 18.72689938398357, "grad_norm": 4.545644760131836, "learning_rate": 6.301369863013699e-08, "log_odds_chosen": 2.875361919403076, "log_odds_ratio": -0.22661304473876953, "logits/chosen": 0.961615800857544, "logits/rejected": 0.9797385931015015, "logps/chosen": -1.9540345668792725, "logps/rejected": -4.572315692901611, "loss": 0.6647, "nll_loss": 0.6420738697052002, "rewards/accuracies": 1.0, "rewards/chosen": -0.19540345668792725, "rewards/margins": 0.26182809472084045, "rewards/rejected": -0.4572315812110901, "step": 6840 }, { "epoch": 18.729637234770706, "grad_norm": 3.430860757827759, "learning_rate": 6.287671232876712e-08, "log_odds_chosen": 3.5306243896484375, "log_odds_ratio": -0.1461358368396759, "logits/chosen": 1.139866590499878, "logits/rejected": 1.1790632009506226, "logps/chosen": -2.0136196613311768, "logps/rejected": -5.408238410949707, "loss": 0.5079, "nll_loss": 0.4933280348777771, "rewards/accuracies": 1.0, "rewards/chosen": -0.20136196911334991, "rewards/margins": 0.3394618630409241, "rewards/rejected": -0.5408238768577576, "step": 6841 }, { "epoch": 18.732375085557837, "grad_norm": 4.042440891265869, "learning_rate": 6.273972602739726e-08, "log_odds_chosen": 3.6272923946380615, "log_odds_ratio": -0.14194563031196594, "logits/chosen": 1.1601769924163818, "logits/rejected": 1.102544903755188, "logps/chosen": -3.0684943199157715, "logps/rejected": -6.572332382202148, "loss": 0.5699, "nll_loss": 0.5557333827018738, "rewards/accuracies": 1.0, "rewards/chosen": -0.3068494200706482, "rewards/margins": 0.35038381814956665, "rewards/rejected": -0.6572332978248596, "step": 6842 }, { "epoch": 18.73511293634497, "grad_norm": 4.4092912673950195, "learning_rate": 6.26027397260274e-08, "log_odds_chosen": 3.6872897148132324, "log_odds_ratio": -0.1386268436908722, "logits/chosen": 1.2672923803329468, "logits/rejected": 1.2338745594024658, "logps/chosen": -2.175045967102051, "logps/rejected": -5.73764705657959, "loss": 0.6169, "nll_loss": 0.6030376553535461, "rewards/accuracies": 1.0, "rewards/chosen": -0.2175045907497406, "rewards/margins": 0.35626012086868286, "rewards/rejected": -0.5737647414207458, "step": 6843 }, { "epoch": 18.7378507871321, "grad_norm": 3.75978684425354, "learning_rate": 6.246575342465754e-08, "log_odds_chosen": 1.8256559371948242, "log_odds_ratio": -0.3474563956260681, "logits/chosen": 1.1962890625, "logits/rejected": 1.2075186967849731, "logps/chosen": -1.4207255840301514, "logps/rejected": -3.0251474380493164, "loss": 0.5174, "nll_loss": 0.482700377702713, "rewards/accuracies": 0.875, "rewards/chosen": -0.14207255840301514, "rewards/margins": 0.16044217348098755, "rewards/rejected": -0.3025147616863251, "step": 6844 }, { "epoch": 18.740588637919235, "grad_norm": 4.1589884757995605, "learning_rate": 6.232876712328767e-08, "log_odds_chosen": 2.696861743927002, "log_odds_ratio": -0.20929041504859924, "logits/chosen": 1.018874168395996, "logits/rejected": 0.8832510709762573, "logps/chosen": -1.594606637954712, "logps/rejected": -4.104181289672852, "loss": 0.4687, "nll_loss": 0.4477382004261017, "rewards/accuracies": 1.0, "rewards/chosen": -0.15946067869663239, "rewards/margins": 0.2509574890136719, "rewards/rejected": -0.41041815280914307, "step": 6845 }, { "epoch": 18.743326488706366, "grad_norm": 3.897125482559204, "learning_rate": 6.21917808219178e-08, "log_odds_chosen": 1.4659737348556519, "log_odds_ratio": -0.32775530219078064, "logits/chosen": 1.0821017026901245, "logits/rejected": 1.1025445461273193, "logps/chosen": -1.8513541221618652, "logps/rejected": -3.1758229732513428, "loss": 0.4522, "nll_loss": 0.41945070028305054, "rewards/accuracies": 0.875, "rewards/chosen": -0.18513540923595428, "rewards/margins": 0.13244690001010895, "rewards/rejected": -0.31758230924606323, "step": 6846 }, { "epoch": 18.746064339493497, "grad_norm": 3.5096805095672607, "learning_rate": 6.205479452054795e-08, "log_odds_chosen": 3.3423869609832764, "log_odds_ratio": -0.16368812322616577, "logits/chosen": 1.3170709609985352, "logits/rejected": 1.2671430110931396, "logps/chosen": -1.3962050676345825, "logps/rejected": -4.280340194702148, "loss": 0.4357, "nll_loss": 0.41931378841400146, "rewards/accuracies": 1.0, "rewards/chosen": -0.13962049782276154, "rewards/margins": 0.28841355443000793, "rewards/rejected": -0.42803409695625305, "step": 6847 }, { "epoch": 18.748802190280628, "grad_norm": 3.7839272022247314, "learning_rate": 6.191780821917808e-08, "log_odds_chosen": 1.6460402011871338, "log_odds_ratio": -0.329012006521225, "logits/chosen": 1.3780553340911865, "logits/rejected": 1.35652756690979, "logps/chosen": -2.1270954608917236, "logps/rejected": -3.692140817642212, "loss": 0.4795, "nll_loss": 0.44658729434013367, "rewards/accuracies": 0.875, "rewards/chosen": -0.21270953118801117, "rewards/margins": 0.1565045416355133, "rewards/rejected": -0.36921408772468567, "step": 6848 }, { "epoch": 18.751540041067763, "grad_norm": 3.427063226699829, "learning_rate": 6.178082191780822e-08, "log_odds_chosen": 2.725813388824463, "log_odds_ratio": -0.11283792555332184, "logits/chosen": 1.407824158668518, "logits/rejected": 1.417205572128296, "logps/chosen": -1.4113565683364868, "logps/rejected": -3.8652734756469727, "loss": 0.419, "nll_loss": 0.40768179297447205, "rewards/accuracies": 1.0, "rewards/chosen": -0.14113566279411316, "rewards/margins": 0.24539169669151306, "rewards/rejected": -0.3865273594856262, "step": 6849 }, { "epoch": 18.754277891854894, "grad_norm": 4.120833873748779, "learning_rate": 6.164383561643836e-08, "log_odds_chosen": 2.253821611404419, "log_odds_ratio": -0.19074656069278717, "logits/chosen": 1.0174161195755005, "logits/rejected": 1.1453056335449219, "logps/chosen": -2.5607047080993652, "logps/rejected": -4.692715167999268, "loss": 0.6896, "nll_loss": 0.6705058217048645, "rewards/accuracies": 1.0, "rewards/chosen": -0.25607046484947205, "rewards/margins": 0.213201105594635, "rewards/rejected": -0.46927154064178467, "step": 6850 }, { "epoch": 18.757015742642025, "grad_norm": 3.3933331966400146, "learning_rate": 6.15068493150685e-08, "log_odds_chosen": 3.4419355392456055, "log_odds_ratio": -0.16630057990550995, "logits/chosen": 0.9613495469093323, "logits/rejected": 1.0141170024871826, "logps/chosen": -1.6374173164367676, "logps/rejected": -4.849981307983398, "loss": 0.4981, "nll_loss": 0.4814774990081787, "rewards/accuracies": 1.0, "rewards/chosen": -0.16374173760414124, "rewards/margins": 0.3212563991546631, "rewards/rejected": -0.4849981665611267, "step": 6851 }, { "epoch": 18.75975359342916, "grad_norm": 3.6835944652557373, "learning_rate": 6.136986301369863e-08, "log_odds_chosen": 2.261936902999878, "log_odds_ratio": -0.2985246181488037, "logits/chosen": 1.0993741750717163, "logits/rejected": 1.1149961948394775, "logps/chosen": -1.9791357517242432, "logps/rejected": -4.12680721282959, "loss": 0.5488, "nll_loss": 0.5189265608787537, "rewards/accuracies": 1.0, "rewards/chosen": -0.19791357219219208, "rewards/margins": 0.21476712822914124, "rewards/rejected": -0.4126806855201721, "step": 6852 }, { "epoch": 18.76249144421629, "grad_norm": 3.5664172172546387, "learning_rate": 6.123287671232876e-08, "log_odds_chosen": 2.540411949157715, "log_odds_ratio": -0.18658822774887085, "logits/chosen": 1.014647126197815, "logits/rejected": 1.0399930477142334, "logps/chosen": -1.6624934673309326, "logps/rejected": -3.981710910797119, "loss": 0.63, "nll_loss": 0.6113213300704956, "rewards/accuracies": 1.0, "rewards/chosen": -0.1662493646144867, "rewards/margins": 0.2319217473268509, "rewards/rejected": -0.3981710970401764, "step": 6853 }, { "epoch": 18.765229295003422, "grad_norm": 3.8077235221862793, "learning_rate": 6.10958904109589e-08, "log_odds_chosen": 1.3712257146835327, "log_odds_ratio": -0.3251446485519409, "logits/chosen": 1.2922887802124023, "logits/rejected": 1.2964754104614258, "logps/chosen": -1.5234732627868652, "logps/rejected": -2.6936511993408203, "loss": 0.4237, "nll_loss": 0.3911975622177124, "rewards/accuracies": 1.0, "rewards/chosen": -0.15234734117984772, "rewards/margins": 0.11701776087284088, "rewards/rejected": -0.2693651020526886, "step": 6854 }, { "epoch": 18.767967145790553, "grad_norm": 10.396642684936523, "learning_rate": 6.095890410958904e-08, "log_odds_chosen": 3.1055421829223633, "log_odds_ratio": -0.4197098910808563, "logits/chosen": 1.182668924331665, "logits/rejected": 1.2360996007919312, "logps/chosen": -2.210697650909424, "logps/rejected": -5.163928985595703, "loss": 0.5767, "nll_loss": 0.5347128510475159, "rewards/accuracies": 0.75, "rewards/chosen": -0.22106978297233582, "rewards/margins": 0.29532313346862793, "rewards/rejected": -0.5163928866386414, "step": 6855 }, { "epoch": 18.770704996577688, "grad_norm": 4.059891223907471, "learning_rate": 6.082191780821917e-08, "log_odds_chosen": 1.1743077039718628, "log_odds_ratio": -0.3468707799911499, "logits/chosen": 1.125150442123413, "logits/rejected": 1.0637588500976562, "logps/chosen": -1.6049286127090454, "logps/rejected": -2.586174488067627, "loss": 0.4438, "nll_loss": 0.4090760350227356, "rewards/accuracies": 0.875, "rewards/chosen": -0.16049285233020782, "rewards/margins": 0.09812459349632263, "rewards/rejected": -0.25861743092536926, "step": 6856 }, { "epoch": 18.77344284736482, "grad_norm": 3.9376306533813477, "learning_rate": 6.068493150684931e-08, "log_odds_chosen": 3.4285969734191895, "log_odds_ratio": -0.20477604866027832, "logits/chosen": 1.1675931215286255, "logits/rejected": 1.2142988443374634, "logps/chosen": -2.1826276779174805, "logps/rejected": -5.496162414550781, "loss": 0.6084, "nll_loss": 0.5879188776016235, "rewards/accuracies": 1.0, "rewards/chosen": -0.21826276183128357, "rewards/margins": 0.33135348558425903, "rewards/rejected": -0.549616277217865, "step": 6857 }, { "epoch": 18.77618069815195, "grad_norm": 3.304656505584717, "learning_rate": 6.054794520547945e-08, "log_odds_chosen": 3.387451171875, "log_odds_ratio": -0.14984801411628723, "logits/chosen": 1.1584845781326294, "logits/rejected": 1.1638660430908203, "logps/chosen": -1.575587272644043, "logps/rejected": -4.776484489440918, "loss": 0.5238, "nll_loss": 0.5088458061218262, "rewards/accuracies": 1.0, "rewards/chosen": -0.15755872428417206, "rewards/margins": 0.32008975744247437, "rewards/rejected": -0.4776484966278076, "step": 6858 }, { "epoch": 18.77891854893908, "grad_norm": 10.102210998535156, "learning_rate": 6.041095890410958e-08, "log_odds_chosen": 1.9332197904586792, "log_odds_ratio": -0.38355961441993713, "logits/chosen": 1.2307684421539307, "logits/rejected": 1.2178680896759033, "logps/chosen": -3.2227096557617188, "logps/rejected": -5.057852268218994, "loss": 0.609, "nll_loss": 0.5706205368041992, "rewards/accuracies": 0.75, "rewards/chosen": -0.3222709894180298, "rewards/margins": 0.18351425230503082, "rewards/rejected": -0.5057852268218994, "step": 6859 }, { "epoch": 18.781656399726216, "grad_norm": 3.6286768913269043, "learning_rate": 6.027397260273973e-08, "log_odds_chosen": 2.773343086242676, "log_odds_ratio": -0.2571498453617096, "logits/chosen": 1.2007498741149902, "logits/rejected": 1.2032442092895508, "logps/chosen": -1.5941481590270996, "logps/rejected": -4.201987266540527, "loss": 0.5029, "nll_loss": 0.47717195749282837, "rewards/accuracies": 0.875, "rewards/chosen": -0.15941482782363892, "rewards/margins": 0.2607838809490204, "rewards/rejected": -0.4201987087726593, "step": 6860 }, { "epoch": 18.784394250513348, "grad_norm": 4.4396514892578125, "learning_rate": 6.013698630136986e-08, "log_odds_chosen": 2.4294211864471436, "log_odds_ratio": -0.16652336716651917, "logits/chosen": 1.1939793825149536, "logits/rejected": 1.215030312538147, "logps/chosen": -1.9708163738250732, "logps/rejected": -4.252249717712402, "loss": 0.5267, "nll_loss": 0.5100932121276855, "rewards/accuracies": 1.0, "rewards/chosen": -0.19708165526390076, "rewards/margins": 0.2281433343887329, "rewards/rejected": -0.4252249598503113, "step": 6861 }, { "epoch": 18.78713210130048, "grad_norm": 4.094890594482422, "learning_rate": 6e-08, "log_odds_chosen": 2.2506840229034424, "log_odds_ratio": -0.25134408473968506, "logits/chosen": 0.9306246042251587, "logits/rejected": 0.8116295337677002, "logps/chosen": -1.6595033407211304, "logps/rejected": -3.7539010047912598, "loss": 0.6046, "nll_loss": 0.5794344544410706, "rewards/accuracies": 1.0, "rewards/chosen": -0.16595032811164856, "rewards/margins": 0.20943976938724518, "rewards/rejected": -0.37539008259773254, "step": 6862 }, { "epoch": 18.78986995208761, "grad_norm": 4.0398640632629395, "learning_rate": 5.986301369863013e-08, "log_odds_chosen": 2.4872262477874756, "log_odds_ratio": -0.2197360396385193, "logits/chosen": 1.0677496194839478, "logits/rejected": 1.0608364343643188, "logps/chosen": -1.435025691986084, "logps/rejected": -3.683434009552002, "loss": 0.637, "nll_loss": 0.6150019764900208, "rewards/accuracies": 1.0, "rewards/chosen": -0.14350256323814392, "rewards/margins": 0.22484084963798523, "rewards/rejected": -0.36834341287612915, "step": 6863 }, { "epoch": 18.792607802874745, "grad_norm": 3.609121322631836, "learning_rate": 5.972602739726027e-08, "log_odds_chosen": 2.518568277359009, "log_odds_ratio": -0.2694656252861023, "logits/chosen": 1.4792717695236206, "logits/rejected": 1.4309594631195068, "logps/chosen": -1.4805241823196411, "logps/rejected": -3.645235061645508, "loss": 0.4252, "nll_loss": 0.39828360080718994, "rewards/accuracies": 0.875, "rewards/chosen": -0.1480524241924286, "rewards/margins": 0.21647104620933533, "rewards/rejected": -0.3645234704017639, "step": 6864 }, { "epoch": 18.795345653661876, "grad_norm": 3.997182846069336, "learning_rate": 5.9589041095890405e-08, "log_odds_chosen": 3.1905155181884766, "log_odds_ratio": -0.12877872586250305, "logits/chosen": 0.8531875610351562, "logits/rejected": 0.8185877203941345, "logps/chosen": -2.0093131065368652, "logps/rejected": -5.0390801429748535, "loss": 0.538, "nll_loss": 0.5251649022102356, "rewards/accuracies": 1.0, "rewards/chosen": -0.20093132555484772, "rewards/margins": 0.30297672748565674, "rewards/rejected": -0.5039080381393433, "step": 6865 }, { "epoch": 18.798083504449007, "grad_norm": 4.011295795440674, "learning_rate": 5.945205479452055e-08, "log_odds_chosen": 1.5049389600753784, "log_odds_ratio": -0.29256948828697205, "logits/chosen": 0.9040123224258423, "logits/rejected": 0.9378533363342285, "logps/chosen": -2.0984816551208496, "logps/rejected": -3.4830901622772217, "loss": 0.5699, "nll_loss": 0.5406477451324463, "rewards/accuracies": 0.875, "rewards/chosen": -0.20984818041324615, "rewards/margins": 0.13846084475517273, "rewards/rejected": -0.3483090400695801, "step": 6866 }, { "epoch": 18.80082135523614, "grad_norm": 4.805916786193848, "learning_rate": 5.931506849315068e-08, "log_odds_chosen": 2.99902606010437, "log_odds_ratio": -0.18942883610725403, "logits/chosen": 1.0131287574768066, "logits/rejected": 0.9389486312866211, "logps/chosen": -1.8152596950531006, "logps/rejected": -4.566791534423828, "loss": 0.5565, "nll_loss": 0.537597119808197, "rewards/accuracies": 1.0, "rewards/chosen": -0.18152597546577454, "rewards/margins": 0.27515310049057007, "rewards/rejected": -0.456679105758667, "step": 6867 }, { "epoch": 18.803559206023273, "grad_norm": 3.557187557220459, "learning_rate": 5.9178082191780814e-08, "log_odds_chosen": 3.544887065887451, "log_odds_ratio": -0.22033056616783142, "logits/chosen": 1.2548887729644775, "logits/rejected": 1.2460620403289795, "logps/chosen": -1.9638710021972656, "logps/rejected": -5.203567981719971, "loss": 0.506, "nll_loss": 0.48393499851226807, "rewards/accuracies": 1.0, "rewards/chosen": -0.19638711214065552, "rewards/margins": 0.32396969199180603, "rewards/rejected": -0.5203567743301392, "step": 6868 }, { "epoch": 18.806297056810404, "grad_norm": 3.5161900520324707, "learning_rate": 5.904109589041096e-08, "log_odds_chosen": 5.0095953941345215, "log_odds_ratio": -0.1566048562526703, "logits/chosen": 1.239185094833374, "logits/rejected": 1.20561683177948, "logps/chosen": -1.9686667919158936, "logps/rejected": -6.825273036956787, "loss": 0.5437, "nll_loss": 0.528089165687561, "rewards/accuracies": 1.0, "rewards/chosen": -0.1968666911125183, "rewards/margins": 0.4856606125831604, "rewards/rejected": -0.6825273036956787, "step": 6869 }, { "epoch": 18.809034907597535, "grad_norm": 13.604597091674805, "learning_rate": 5.890410958904109e-08, "log_odds_chosen": 0.5886244773864746, "log_odds_ratio": -0.871453583240509, "logits/chosen": 1.0060310363769531, "logits/rejected": 0.9879533052444458, "logps/chosen": -2.293928384780884, "logps/rejected": -2.7178730964660645, "loss": 0.5501, "nll_loss": 0.46297526359558105, "rewards/accuracies": 0.75, "rewards/chosen": -0.229392871260643, "rewards/margins": 0.042394451797008514, "rewards/rejected": -0.2717873156070709, "step": 6870 }, { "epoch": 18.811772758384667, "grad_norm": 3.5800082683563232, "learning_rate": 5.876712328767123e-08, "log_odds_chosen": 2.349736213684082, "log_odds_ratio": -0.18027806282043457, "logits/chosen": 0.9730571508407593, "logits/rejected": 0.9925824403762817, "logps/chosen": -1.7876882553100586, "logps/rejected": -3.962124824523926, "loss": 0.4324, "nll_loss": 0.41438615322113037, "rewards/accuracies": 1.0, "rewards/chosen": -0.1787688136100769, "rewards/margins": 0.21744365990161896, "rewards/rejected": -0.39621248841285706, "step": 6871 }, { "epoch": 18.8145106091718, "grad_norm": 3.6269309520721436, "learning_rate": 5.863013698630137e-08, "log_odds_chosen": 3.111006259918213, "log_odds_ratio": -0.1186758428812027, "logits/chosen": 1.3295297622680664, "logits/rejected": 1.3689993619918823, "logps/chosen": -2.0964536666870117, "logps/rejected": -5.061389923095703, "loss": 0.5027, "nll_loss": 0.49084651470184326, "rewards/accuracies": 1.0, "rewards/chosen": -0.2096453607082367, "rewards/margins": 0.29649364948272705, "rewards/rejected": -0.5061390399932861, "step": 6872 }, { "epoch": 18.817248459958932, "grad_norm": 3.8348939418792725, "learning_rate": 5.849315068493151e-08, "log_odds_chosen": 3.3666036128997803, "log_odds_ratio": -0.20152725279331207, "logits/chosen": 1.0248374938964844, "logits/rejected": 1.0109870433807373, "logps/chosen": -1.988110899925232, "logps/rejected": -5.1153130531311035, "loss": 0.4983, "nll_loss": 0.47810566425323486, "rewards/accuracies": 0.875, "rewards/chosen": -0.19881108403205872, "rewards/margins": 0.31272023916244507, "rewards/rejected": -0.5115313529968262, "step": 6873 }, { "epoch": 18.819986310746064, "grad_norm": 3.427408456802368, "learning_rate": 5.835616438356164e-08, "log_odds_chosen": 1.747631311416626, "log_odds_ratio": -0.23980672657489777, "logits/chosen": 1.1867223978042603, "logits/rejected": 1.1271902322769165, "logps/chosen": -1.4919523000717163, "logps/rejected": -3.0226097106933594, "loss": 0.3986, "nll_loss": 0.37459152936935425, "rewards/accuracies": 1.0, "rewards/chosen": -0.14919522404670715, "rewards/margins": 0.1530657559633255, "rewards/rejected": -0.30226099491119385, "step": 6874 }, { "epoch": 18.822724161533195, "grad_norm": 4.44407320022583, "learning_rate": 5.821917808219177e-08, "log_odds_chosen": 3.5467941761016846, "log_odds_ratio": -0.33764705061912537, "logits/chosen": 1.0363036394119263, "logits/rejected": 1.0997138023376465, "logps/chosen": -1.8475675582885742, "logps/rejected": -5.107248306274414, "loss": 0.6596, "nll_loss": 0.6258299350738525, "rewards/accuracies": 0.875, "rewards/chosen": -0.18475675582885742, "rewards/margins": 0.32596808671951294, "rewards/rejected": -0.5107248425483704, "step": 6875 }, { "epoch": 18.82546201232033, "grad_norm": 3.8506128787994385, "learning_rate": 5.808219178082192e-08, "log_odds_chosen": 2.4545340538024902, "log_odds_ratio": -0.1957191824913025, "logits/chosen": 1.2413477897644043, "logits/rejected": 1.2269576787948608, "logps/chosen": -1.8827311992645264, "logps/rejected": -3.949596881866455, "loss": 0.5055, "nll_loss": 0.4859035611152649, "rewards/accuracies": 1.0, "rewards/chosen": -0.1882731318473816, "rewards/margins": 0.2066865712404251, "rewards/rejected": -0.3949596881866455, "step": 6876 }, { "epoch": 18.82819986310746, "grad_norm": 3.4865057468414307, "learning_rate": 5.794520547945205e-08, "log_odds_chosen": 2.9676434993743896, "log_odds_ratio": -0.1260385364294052, "logits/chosen": 1.16887366771698, "logits/rejected": 1.2184427976608276, "logps/chosen": -1.8958792686462402, "logps/rejected": -4.673784255981445, "loss": 0.4855, "nll_loss": 0.4728976786136627, "rewards/accuracies": 0.875, "rewards/chosen": -0.18958792090415955, "rewards/margins": 0.27779051661491394, "rewards/rejected": -0.4673784375190735, "step": 6877 }, { "epoch": 18.830937713894592, "grad_norm": 3.882485866546631, "learning_rate": 5.780821917808218e-08, "log_odds_chosen": 2.9763193130493164, "log_odds_ratio": -0.26791444420814514, "logits/chosen": 0.9497845768928528, "logits/rejected": 0.9834786653518677, "logps/chosen": -1.9254953861236572, "logps/rejected": -4.8266377449035645, "loss": 0.5749, "nll_loss": 0.5481346845626831, "rewards/accuracies": 0.875, "rewards/chosen": -0.19254955649375916, "rewards/margins": 0.29011422395706177, "rewards/rejected": -0.4826637804508209, "step": 6878 }, { "epoch": 18.833675564681727, "grad_norm": 4.75388765335083, "learning_rate": 5.767123287671233e-08, "log_odds_chosen": 2.9014291763305664, "log_odds_ratio": -0.12521781027317047, "logits/chosen": 1.3431987762451172, "logits/rejected": 1.4058805704116821, "logps/chosen": -2.251394271850586, "logps/rejected": -4.796996593475342, "loss": 0.5237, "nll_loss": 0.5111459493637085, "rewards/accuracies": 1.0, "rewards/chosen": -0.22513940930366516, "rewards/margins": 0.25456032156944275, "rewards/rejected": -0.4796997308731079, "step": 6879 }, { "epoch": 18.836413415468858, "grad_norm": 5.5271992683410645, "learning_rate": 5.753424657534246e-08, "log_odds_chosen": 4.283120632171631, "log_odds_ratio": -0.42176252603530884, "logits/chosen": 0.9194394946098328, "logits/rejected": 0.8978719711303711, "logps/chosen": -2.0319008827209473, "logps/rejected": -6.211535453796387, "loss": 0.6249, "nll_loss": 0.5826878547668457, "rewards/accuracies": 0.875, "rewards/chosen": -0.20319008827209473, "rewards/margins": 0.4179634153842926, "rewards/rejected": -0.6211534738540649, "step": 6880 }, { "epoch": 18.83915126625599, "grad_norm": 4.41161584854126, "learning_rate": 5.73972602739726e-08, "log_odds_chosen": 2.841891288757324, "log_odds_ratio": -0.16390708088874817, "logits/chosen": 0.9502036571502686, "logits/rejected": 0.7981292009353638, "logps/chosen": -1.099397897720337, "logps/rejected": -3.5437188148498535, "loss": 0.5676, "nll_loss": 0.5512010455131531, "rewards/accuracies": 1.0, "rewards/chosen": -0.10993978381156921, "rewards/margins": 0.24443209171295166, "rewards/rejected": -0.3543718457221985, "step": 6881 }, { "epoch": 18.84188911704312, "grad_norm": 3.7314586639404297, "learning_rate": 5.726027397260274e-08, "log_odds_chosen": 5.059234619140625, "log_odds_ratio": -0.029612069949507713, "logits/chosen": 1.299099087715149, "logits/rejected": 1.352156639099121, "logps/chosen": -1.5101109743118286, "logps/rejected": -6.233161926269531, "loss": 0.445, "nll_loss": 0.4420775771141052, "rewards/accuracies": 1.0, "rewards/chosen": -0.15101110935211182, "rewards/margins": 0.4723051190376282, "rewards/rejected": -0.6233161687850952, "step": 6882 }, { "epoch": 18.844626967830255, "grad_norm": 6.529886722564697, "learning_rate": 5.712328767123288e-08, "log_odds_chosen": 1.3157588243484497, "log_odds_ratio": -0.3302325904369354, "logits/chosen": 1.0457262992858887, "logits/rejected": 1.0992993116378784, "logps/chosen": -3.8126754760742188, "logps/rejected": -4.999118804931641, "loss": 0.7981, "nll_loss": 0.765078604221344, "rewards/accuracies": 0.875, "rewards/chosen": -0.3812675476074219, "rewards/margins": 0.1186443567276001, "rewards/rejected": -0.499911904335022, "step": 6883 }, { "epoch": 18.847364818617386, "grad_norm": 3.855332374572754, "learning_rate": 5.698630136986301e-08, "log_odds_chosen": 3.2501723766326904, "log_odds_ratio": -0.1451481580734253, "logits/chosen": 1.0746896266937256, "logits/rejected": 1.0761253833770752, "logps/chosen": -1.5536885261535645, "logps/rejected": -4.588296413421631, "loss": 0.4808, "nll_loss": 0.4663287103176117, "rewards/accuracies": 1.0, "rewards/chosen": -0.155368834733963, "rewards/margins": 0.3034608066082001, "rewards/rejected": -0.4588296413421631, "step": 6884 }, { "epoch": 18.850102669404517, "grad_norm": 3.115635633468628, "learning_rate": 5.684931506849315e-08, "log_odds_chosen": 3.192946434020996, "log_odds_ratio": -0.11257435381412506, "logits/chosen": 1.2737177610397339, "logits/rejected": 1.194416880607605, "logps/chosen": -1.3985768556594849, "logps/rejected": -4.275273323059082, "loss": 0.4834, "nll_loss": 0.47210797667503357, "rewards/accuracies": 1.0, "rewards/chosen": -0.139857679605484, "rewards/margins": 0.2876696288585663, "rewards/rejected": -0.4275273382663727, "step": 6885 }, { "epoch": 18.85284052019165, "grad_norm": 3.677302122116089, "learning_rate": 5.671232876712329e-08, "log_odds_chosen": 2.6781911849975586, "log_odds_ratio": -0.19879892468452454, "logits/chosen": 1.2812899351119995, "logits/rejected": 1.3037211894989014, "logps/chosen": -2.1050219535827637, "logps/rejected": -4.651259422302246, "loss": 0.5697, "nll_loss": 0.5498272180557251, "rewards/accuracies": 1.0, "rewards/chosen": -0.21050220727920532, "rewards/margins": 0.2546237111091614, "rewards/rejected": -0.4651259183883667, "step": 6886 }, { "epoch": 18.855578370978783, "grad_norm": 3.496650457382202, "learning_rate": 5.657534246575342e-08, "log_odds_chosen": 2.4234399795532227, "log_odds_ratio": -0.1773439347743988, "logits/chosen": 0.984984815120697, "logits/rejected": 0.9572906494140625, "logps/chosen": -1.9657537937164307, "logps/rejected": -4.195336818695068, "loss": 0.5502, "nll_loss": 0.5324456095695496, "rewards/accuracies": 1.0, "rewards/chosen": -0.19657538831233978, "rewards/margins": 0.22295832633972168, "rewards/rejected": -0.41953369975090027, "step": 6887 }, { "epoch": 18.858316221765914, "grad_norm": 3.8150994777679443, "learning_rate": 5.6438356164383565e-08, "log_odds_chosen": 3.0324594974517822, "log_odds_ratio": -0.1198904886841774, "logits/chosen": 1.3239384889602661, "logits/rejected": 1.2815805673599243, "logps/chosen": -1.7915773391723633, "logps/rejected": -4.5620856285095215, "loss": 0.4221, "nll_loss": 0.41008180379867554, "rewards/accuracies": 1.0, "rewards/chosen": -0.17915776371955872, "rewards/margins": 0.27705082297325134, "rewards/rejected": -0.45620855689048767, "step": 6888 }, { "epoch": 18.861054072553046, "grad_norm": 3.5421290397644043, "learning_rate": 5.63013698630137e-08, "log_odds_chosen": 2.7274622917175293, "log_odds_ratio": -0.21957196295261383, "logits/chosen": 1.0915600061416626, "logits/rejected": 1.1309268474578857, "logps/chosen": -2.429255485534668, "logps/rejected": -5.037055015563965, "loss": 0.6841, "nll_loss": 0.6620964407920837, "rewards/accuracies": 0.875, "rewards/chosen": -0.24292553961277008, "rewards/margins": 0.2607799768447876, "rewards/rejected": -0.5037055015563965, "step": 6889 }, { "epoch": 18.863791923340177, "grad_norm": 3.3239290714263916, "learning_rate": 5.616438356164383e-08, "log_odds_chosen": 4.707417011260986, "log_odds_ratio": -0.14662113785743713, "logits/chosen": 0.9423812627792358, "logits/rejected": 0.9746114015579224, "logps/chosen": -1.5830914974212646, "logps/rejected": -5.970705986022949, "loss": 0.5112, "nll_loss": 0.4965430498123169, "rewards/accuracies": 1.0, "rewards/chosen": -0.15830914676189423, "rewards/margins": 0.43876147270202637, "rewards/rejected": -0.5970706343650818, "step": 6890 }, { "epoch": 18.86652977412731, "grad_norm": 4.05747127532959, "learning_rate": 5.6027397260273975e-08, "log_odds_chosen": 2.964510202407837, "log_odds_ratio": -0.23691536486148834, "logits/chosen": 1.0454736948013306, "logits/rejected": 0.9102245569229126, "logps/chosen": -2.0712242126464844, "logps/rejected": -4.870619773864746, "loss": 0.6446, "nll_loss": 0.6209459900856018, "rewards/accuracies": 0.875, "rewards/chosen": -0.20712243020534515, "rewards/margins": 0.27993959188461304, "rewards/rejected": -0.487062007188797, "step": 6891 }, { "epoch": 18.869267624914443, "grad_norm": 4.279276371002197, "learning_rate": 5.589041095890411e-08, "log_odds_chosen": 3.725576400756836, "log_odds_ratio": -0.23369456827640533, "logits/chosen": 1.0835751295089722, "logits/rejected": 1.071379542350769, "logps/chosen": -1.839239239692688, "logps/rejected": -5.3864922523498535, "loss": 0.4956, "nll_loss": 0.47226303815841675, "rewards/accuracies": 0.875, "rewards/chosen": -0.18392391502857208, "rewards/margins": 0.35472530126571655, "rewards/rejected": -0.5386492013931274, "step": 6892 }, { "epoch": 18.872005475701574, "grad_norm": 4.111060619354248, "learning_rate": 5.5753424657534246e-08, "log_odds_chosen": 3.008697986602783, "log_odds_ratio": -0.10050719976425171, "logits/chosen": 1.058208703994751, "logits/rejected": 1.0850319862365723, "logps/chosen": -2.6301121711730957, "logps/rejected": -5.508846282958984, "loss": 0.5809, "nll_loss": 0.5708147287368774, "rewards/accuracies": 1.0, "rewards/chosen": -0.2630111873149872, "rewards/margins": 0.28787344694137573, "rewards/rejected": -0.5508846044540405, "step": 6893 }, { "epoch": 18.874743326488705, "grad_norm": 3.142188310623169, "learning_rate": 5.5616438356164385e-08, "log_odds_chosen": 2.283726692199707, "log_odds_ratio": -0.18230029940605164, "logits/chosen": 0.9785928726196289, "logits/rejected": 0.9393336176872253, "logps/chosen": -2.10778546333313, "logps/rejected": -4.249309062957764, "loss": 0.524, "nll_loss": 0.5057944059371948, "rewards/accuracies": 1.0, "rewards/chosen": -0.21077856421470642, "rewards/margins": 0.21415235102176666, "rewards/rejected": -0.4249309003353119, "step": 6894 }, { "epoch": 18.87748117727584, "grad_norm": 3.641099691390991, "learning_rate": 5.547945205479452e-08, "log_odds_chosen": 4.630912780761719, "log_odds_ratio": -0.12560178339481354, "logits/chosen": 1.0351390838623047, "logits/rejected": 1.035940408706665, "logps/chosen": -2.067596197128296, "logps/rejected": -6.551618576049805, "loss": 0.605, "nll_loss": 0.5924166440963745, "rewards/accuracies": 1.0, "rewards/chosen": -0.20675963163375854, "rewards/margins": 0.4484022259712219, "rewards/rejected": -0.6551618576049805, "step": 6895 }, { "epoch": 18.88021902806297, "grad_norm": 4.059667110443115, "learning_rate": 5.5342465753424656e-08, "log_odds_chosen": 3.6992146968841553, "log_odds_ratio": -0.06492581963539124, "logits/chosen": 1.083446979522705, "logits/rejected": 1.1460800170898438, "logps/chosen": -2.4111928939819336, "logps/rejected": -5.96706485748291, "loss": 0.7576, "nll_loss": 0.7511050701141357, "rewards/accuracies": 1.0, "rewards/chosen": -0.24111929535865784, "rewards/margins": 0.3555871248245239, "rewards/rejected": -0.5967064499855042, "step": 6896 }, { "epoch": 18.882956878850102, "grad_norm": 3.5009255409240723, "learning_rate": 5.520547945205479e-08, "log_odds_chosen": 2.761186122894287, "log_odds_ratio": -0.13934804499149323, "logits/chosen": 1.187437653541565, "logits/rejected": 1.15193510055542, "logps/chosen": -1.5586538314819336, "logps/rejected": -4.088623523712158, "loss": 0.441, "nll_loss": 0.42709603905677795, "rewards/accuracies": 1.0, "rewards/chosen": -0.1558653861284256, "rewards/margins": 0.25299695134162903, "rewards/rejected": -0.4088623523712158, "step": 6897 }, { "epoch": 18.885694729637233, "grad_norm": 6.068607330322266, "learning_rate": 5.5068493150684933e-08, "log_odds_chosen": 1.8007538318634033, "log_odds_ratio": -0.34166020154953003, "logits/chosen": 1.0286582708358765, "logits/rejected": 0.9323252439498901, "logps/chosen": -1.7887663841247559, "logps/rejected": -3.3183701038360596, "loss": 0.4611, "nll_loss": 0.42697376012802124, "rewards/accuracies": 0.875, "rewards/chosen": -0.17887663841247559, "rewards/margins": 0.15296036005020142, "rewards/rejected": -0.331836998462677, "step": 6898 }, { "epoch": 18.888432580424368, "grad_norm": 8.087991714477539, "learning_rate": 5.4931506849315066e-08, "log_odds_chosen": 1.4675754308700562, "log_odds_ratio": -0.4610986113548279, "logits/chosen": 1.183497667312622, "logits/rejected": 1.1316884756088257, "logps/chosen": -2.2543911933898926, "logps/rejected": -3.58577299118042, "loss": 0.604, "nll_loss": 0.557876706123352, "rewards/accuracies": 0.75, "rewards/chosen": -0.2254391312599182, "rewards/margins": 0.13313817977905273, "rewards/rejected": -0.35857728123664856, "step": 6899 }, { "epoch": 18.8911704312115, "grad_norm": 3.5321173667907715, "learning_rate": 5.47945205479452e-08, "log_odds_chosen": 2.2941267490386963, "log_odds_ratio": -0.20087674260139465, "logits/chosen": 1.1079325675964355, "logits/rejected": 1.0970821380615234, "logps/chosen": -1.5322237014770508, "logps/rejected": -3.550471067428589, "loss": 0.476, "nll_loss": 0.4559531807899475, "rewards/accuracies": 1.0, "rewards/chosen": -0.15322236716747284, "rewards/margins": 0.20182475447654724, "rewards/rejected": -0.3550471365451813, "step": 6900 }, { "epoch": 18.89390828199863, "grad_norm": 3.5030951499938965, "learning_rate": 5.465753424657534e-08, "log_odds_chosen": 3.1130857467651367, "log_odds_ratio": -0.246931254863739, "logits/chosen": 1.3431090116500854, "logits/rejected": 1.3140767812728882, "logps/chosen": -1.6651108264923096, "logps/rejected": -4.561371326446533, "loss": 0.4308, "nll_loss": 0.4061238765716553, "rewards/accuracies": 1.0, "rewards/chosen": -0.16651108860969543, "rewards/margins": 0.2896260619163513, "rewards/rejected": -0.45613715052604675, "step": 6901 }, { "epoch": 18.89664613278576, "grad_norm": 4.531057357788086, "learning_rate": 5.4520547945205476e-08, "log_odds_chosen": 2.1410117149353027, "log_odds_ratio": -0.4170464873313904, "logits/chosen": 1.2953311204910278, "logits/rejected": 1.244394063949585, "logps/chosen": -2.059514284133911, "logps/rejected": -4.045541763305664, "loss": 0.5093, "nll_loss": 0.46756866574287415, "rewards/accuracies": 0.875, "rewards/chosen": -0.20595142245292664, "rewards/margins": 0.19860273599624634, "rewards/rejected": -0.40455418825149536, "step": 6902 }, { "epoch": 18.899383983572896, "grad_norm": 3.8151957988739014, "learning_rate": 5.4383561643835614e-08, "log_odds_chosen": 1.4739744663238525, "log_odds_ratio": -0.24651303887367249, "logits/chosen": 1.1605656147003174, "logits/rejected": 1.0905048847198486, "logps/chosen": -1.233119010925293, "logps/rejected": -2.4680144786834717, "loss": 0.3976, "nll_loss": 0.37294280529022217, "rewards/accuracies": 1.0, "rewards/chosen": -0.12331189215183258, "rewards/margins": 0.12348955124616623, "rewards/rejected": -0.2468014657497406, "step": 6903 }, { "epoch": 18.902121834360027, "grad_norm": 3.5050601959228516, "learning_rate": 5.424657534246575e-08, "log_odds_chosen": 4.723921775817871, "log_odds_ratio": -0.12257693707942963, "logits/chosen": 1.1752346754074097, "logits/rejected": 1.1103496551513672, "logps/chosen": -1.5698959827423096, "logps/rejected": -6.045386791229248, "loss": 0.5101, "nll_loss": 0.4978499412536621, "rewards/accuracies": 1.0, "rewards/chosen": -0.15698960423469543, "rewards/margins": 0.44754907488822937, "rewards/rejected": -0.6045386791229248, "step": 6904 }, { "epoch": 18.90485968514716, "grad_norm": 3.610319137573242, "learning_rate": 5.4109589041095885e-08, "log_odds_chosen": 3.0576488971710205, "log_odds_ratio": -0.1627313643693924, "logits/chosen": 1.1426782608032227, "logits/rejected": 1.0668883323669434, "logps/chosen": -1.8704168796539307, "logps/rejected": -4.725096225738525, "loss": 0.5252, "nll_loss": 0.5088868737220764, "rewards/accuracies": 0.875, "rewards/chosen": -0.18704168498516083, "rewards/margins": 0.2854679226875305, "rewards/rejected": -0.47250962257385254, "step": 6905 }, { "epoch": 18.907597535934293, "grad_norm": 4.190164566040039, "learning_rate": 5.3972602739726024e-08, "log_odds_chosen": 4.073308944702148, "log_odds_ratio": -0.18340308964252472, "logits/chosen": 1.1015371084213257, "logits/rejected": 1.1257766485214233, "logps/chosen": -1.6887519359588623, "logps/rejected": -5.550697326660156, "loss": 0.487, "nll_loss": 0.4686688780784607, "rewards/accuracies": 1.0, "rewards/chosen": -0.16887520253658295, "rewards/margins": 0.38619452714920044, "rewards/rejected": -0.5550697445869446, "step": 6906 }, { "epoch": 18.910335386721425, "grad_norm": 4.140987396240234, "learning_rate": 5.383561643835616e-08, "log_odds_chosen": 2.8155035972595215, "log_odds_ratio": -0.2859271168708801, "logits/chosen": 1.1064050197601318, "logits/rejected": 1.1027722358703613, "logps/chosen": -2.129206895828247, "logps/rejected": -4.687342166900635, "loss": 0.6105, "nll_loss": 0.5819186568260193, "rewards/accuracies": 0.875, "rewards/chosen": -0.21292069554328918, "rewards/margins": 0.2558135390281677, "rewards/rejected": -0.4687342345714569, "step": 6907 }, { "epoch": 18.913073237508556, "grad_norm": 5.908936023712158, "learning_rate": 5.36986301369863e-08, "log_odds_chosen": 1.5722830295562744, "log_odds_ratio": -0.3653249740600586, "logits/chosen": 1.1418309211730957, "logits/rejected": 1.1548742055892944, "logps/chosen": -1.927196979522705, "logps/rejected": -3.3239364624023438, "loss": 0.5064, "nll_loss": 0.46981894969940186, "rewards/accuracies": 0.75, "rewards/chosen": -0.1927196979522705, "rewards/margins": 0.13967393338680267, "rewards/rejected": -0.3323936462402344, "step": 6908 }, { "epoch": 18.915811088295687, "grad_norm": 3.720107316970825, "learning_rate": 5.3561643835616434e-08, "log_odds_chosen": 2.476853132247925, "log_odds_ratio": -0.20771372318267822, "logits/chosen": 0.8282681703567505, "logits/rejected": 0.8528313636779785, "logps/chosen": -1.5718388557434082, "logps/rejected": -3.7710561752319336, "loss": 0.534, "nll_loss": 0.5132471919059753, "rewards/accuracies": 1.0, "rewards/chosen": -0.15718388557434082, "rewards/margins": 0.21992173790931702, "rewards/rejected": -0.3771056532859802, "step": 6909 }, { "epoch": 18.91854893908282, "grad_norm": 3.462707757949829, "learning_rate": 5.342465753424657e-08, "log_odds_chosen": 2.914429187774658, "log_odds_ratio": -0.11481508612632751, "logits/chosen": 0.8767727613449097, "logits/rejected": 0.8154579401016235, "logps/chosen": -1.7412737607955933, "logps/rejected": -4.3962531089782715, "loss": 0.4868, "nll_loss": 0.47533732652664185, "rewards/accuracies": 1.0, "rewards/chosen": -0.17412737011909485, "rewards/margins": 0.2654978930950165, "rewards/rejected": -0.4396253228187561, "step": 6910 }, { "epoch": 18.921286789869953, "grad_norm": 3.267488718032837, "learning_rate": 5.328767123287671e-08, "log_odds_chosen": 3.0680794715881348, "log_odds_ratio": -0.12958164513111115, "logits/chosen": 0.975403904914856, "logits/rejected": 0.942572295665741, "logps/chosen": -1.0932281017303467, "logps/rejected": -3.745169162750244, "loss": 0.4223, "nll_loss": 0.4093533456325531, "rewards/accuracies": 1.0, "rewards/chosen": -0.10932280868291855, "rewards/margins": 0.2651941180229187, "rewards/rejected": -0.37451693415641785, "step": 6911 }, { "epoch": 18.924024640657084, "grad_norm": 7.342456340789795, "learning_rate": 5.3150684931506844e-08, "log_odds_chosen": 1.8118603229522705, "log_odds_ratio": -0.31665748357772827, "logits/chosen": 1.3593565225601196, "logits/rejected": 1.3710731267929077, "logps/chosen": -2.024085283279419, "logps/rejected": -3.693582773208618, "loss": 0.5538, "nll_loss": 0.5221549868583679, "rewards/accuracies": 0.875, "rewards/chosen": -0.20240852236747742, "rewards/margins": 0.16694974899291992, "rewards/rejected": -0.3693583011627197, "step": 6912 }, { "epoch": 18.926762491444215, "grad_norm": 3.6745951175689697, "learning_rate": 5.301369863013699e-08, "log_odds_chosen": 3.8393235206604004, "log_odds_ratio": -0.07276684790849686, "logits/chosen": 1.4669193029403687, "logits/rejected": 1.5126588344573975, "logps/chosen": -1.7378934621810913, "logps/rejected": -5.342584133148193, "loss": 0.4676, "nll_loss": 0.46033409237861633, "rewards/accuracies": 1.0, "rewards/chosen": -0.1737893521785736, "rewards/margins": 0.36046910285949707, "rewards/rejected": -0.5342584848403931, "step": 6913 }, { "epoch": 18.92950034223135, "grad_norm": 8.242761611938477, "learning_rate": 5.287671232876712e-08, "log_odds_chosen": 1.3756482601165771, "log_odds_ratio": -0.6614194512367249, "logits/chosen": 0.9806087613105774, "logits/rejected": 0.9535529017448425, "logps/chosen": -2.3488950729370117, "logps/rejected": -3.6738643646240234, "loss": 0.7258, "nll_loss": 0.6596318483352661, "rewards/accuracies": 0.75, "rewards/chosen": -0.23488953709602356, "rewards/margins": 0.1324969232082367, "rewards/rejected": -0.36738646030426025, "step": 6914 }, { "epoch": 18.93223819301848, "grad_norm": 3.4929118156433105, "learning_rate": 5.2739726027397254e-08, "log_odds_chosen": 3.1554274559020996, "log_odds_ratio": -0.1702401340007782, "logits/chosen": 1.107621192932129, "logits/rejected": 1.0142678022384644, "logps/chosen": -1.7532832622528076, "logps/rejected": -4.6540303230285645, "loss": 0.5556, "nll_loss": 0.5385474562644958, "rewards/accuracies": 1.0, "rewards/chosen": -0.1753283143043518, "rewards/margins": 0.2900747060775757, "rewards/rejected": -0.4654030501842499, "step": 6915 }, { "epoch": 18.934976043805612, "grad_norm": 3.9604110717773438, "learning_rate": 5.26027397260274e-08, "log_odds_chosen": 1.796126365661621, "log_odds_ratio": -0.26164186000823975, "logits/chosen": 1.1693003177642822, "logits/rejected": 1.1527024507522583, "logps/chosen": -1.8311538696289062, "logps/rejected": -3.295376777648926, "loss": 0.4658, "nll_loss": 0.4396531581878662, "rewards/accuracies": 0.875, "rewards/chosen": -0.1831153929233551, "rewards/margins": 0.14642232656478882, "rewards/rejected": -0.3295377194881439, "step": 6916 }, { "epoch": 18.937713894592743, "grad_norm": 3.7705488204956055, "learning_rate": 5.246575342465753e-08, "log_odds_chosen": 4.715494632720947, "log_odds_ratio": -0.08120925724506378, "logits/chosen": 1.0443487167358398, "logits/rejected": 1.1409709453582764, "logps/chosen": -2.085812568664551, "logps/rejected": -6.634122848510742, "loss": 0.7514, "nll_loss": 0.7432342767715454, "rewards/accuracies": 1.0, "rewards/chosen": -0.20858126878738403, "rewards/margins": 0.454831063747406, "rewards/rejected": -0.66341233253479, "step": 6917 }, { "epoch": 18.940451745379878, "grad_norm": 3.8462908267974854, "learning_rate": 5.232876712328767e-08, "log_odds_chosen": 2.1224660873413086, "log_odds_ratio": -0.2067606896162033, "logits/chosen": 1.3377909660339355, "logits/rejected": 1.371934413909912, "logps/chosen": -2.2518157958984375, "logps/rejected": -4.272250175476074, "loss": 0.6359, "nll_loss": 0.6152437925338745, "rewards/accuracies": 1.0, "rewards/chosen": -0.22518157958984375, "rewards/margins": 0.20204345881938934, "rewards/rejected": -0.4272250235080719, "step": 6918 }, { "epoch": 18.94318959616701, "grad_norm": 3.6321370601654053, "learning_rate": 5.21917808219178e-08, "log_odds_chosen": 2.8164992332458496, "log_odds_ratio": -0.21427768468856812, "logits/chosen": 1.3500375747680664, "logits/rejected": 1.2640843391418457, "logps/chosen": -1.62082839012146, "logps/rejected": -4.264485836029053, "loss": 0.494, "nll_loss": 0.4725993871688843, "rewards/accuracies": 0.875, "rewards/chosen": -0.16208283603191376, "rewards/margins": 0.2643657326698303, "rewards/rejected": -0.4264485836029053, "step": 6919 }, { "epoch": 18.94592744695414, "grad_norm": 3.5023481845855713, "learning_rate": 5.205479452054794e-08, "log_odds_chosen": 2.8950061798095703, "log_odds_ratio": -0.13746985793113708, "logits/chosen": 1.0893404483795166, "logits/rejected": 1.086028814315796, "logps/chosen": -1.2162119150161743, "logps/rejected": -3.68666410446167, "loss": 0.4413, "nll_loss": 0.4275600016117096, "rewards/accuracies": 1.0, "rewards/chosen": -0.12162119150161743, "rewards/margins": 0.24704527854919434, "rewards/rejected": -0.36866647005081177, "step": 6920 }, { "epoch": 18.94866529774127, "grad_norm": 6.44583797454834, "learning_rate": 5.191780821917808e-08, "log_odds_chosen": 3.100681781768799, "log_odds_ratio": -0.3668420910835266, "logits/chosen": 1.2740905284881592, "logits/rejected": 1.271897792816162, "logps/chosen": -2.0816471576690674, "logps/rejected": -4.982428073883057, "loss": 0.5875, "nll_loss": 0.5508300065994263, "rewards/accuracies": 0.875, "rewards/chosen": -0.20816472172737122, "rewards/margins": 0.2900781035423279, "rewards/rejected": -0.4982427954673767, "step": 6921 }, { "epoch": 18.951403148528406, "grad_norm": 11.945405960083008, "learning_rate": 5.178082191780821e-08, "log_odds_chosen": 2.7542037963867188, "log_odds_ratio": -0.26281699538230896, "logits/chosen": 1.2820720672607422, "logits/rejected": 1.2985285520553589, "logps/chosen": -2.210643768310547, "logps/rejected": -4.741297721862793, "loss": 0.5384, "nll_loss": 0.512109100818634, "rewards/accuracies": 0.875, "rewards/chosen": -0.22106435894966125, "rewards/margins": 0.2530653774738312, "rewards/rejected": -0.47412973642349243, "step": 6922 }, { "epoch": 18.954140999315538, "grad_norm": 9.32448959350586, "learning_rate": 5.164383561643836e-08, "log_odds_chosen": 2.2904932498931885, "log_odds_ratio": -0.27419939637184143, "logits/chosen": 1.1800031661987305, "logits/rejected": 1.2184914350509644, "logps/chosen": -2.5836262702941895, "logps/rejected": -4.692328453063965, "loss": 0.5681, "nll_loss": 0.5406475067138672, "rewards/accuracies": 1.0, "rewards/chosen": -0.25836265087127686, "rewards/margins": 0.21087023615837097, "rewards/rejected": -0.46923285722732544, "step": 6923 }, { "epoch": 18.95687885010267, "grad_norm": 6.877859115600586, "learning_rate": 5.150684931506849e-08, "log_odds_chosen": 0.9337182641029358, "log_odds_ratio": -0.47614946961402893, "logits/chosen": 0.931813657283783, "logits/rejected": 0.8771450519561768, "logps/chosen": -2.1380817890167236, "logps/rejected": -2.955274820327759, "loss": 0.642, "nll_loss": 0.5943388938903809, "rewards/accuracies": 0.875, "rewards/chosen": -0.21380816400051117, "rewards/margins": 0.0817193016409874, "rewards/rejected": -0.29552745819091797, "step": 6924 }, { "epoch": 18.9596167008898, "grad_norm": 3.7044765949249268, "learning_rate": 5.136986301369862e-08, "log_odds_chosen": 2.432272434234619, "log_odds_ratio": -0.20059776306152344, "logits/chosen": 0.9342076182365417, "logits/rejected": 0.9094085693359375, "logps/chosen": -2.1100401878356934, "logps/rejected": -4.402952194213867, "loss": 0.611, "nll_loss": 0.5909084677696228, "rewards/accuracies": 1.0, "rewards/chosen": -0.21100400388240814, "rewards/margins": 0.22929121553897858, "rewards/rejected": -0.4402952194213867, "step": 6925 }, { "epoch": 18.962354551676935, "grad_norm": 3.3472812175750732, "learning_rate": 5.123287671232877e-08, "log_odds_chosen": 3.190034866333008, "log_odds_ratio": -0.22891898453235626, "logits/chosen": 0.9862052202224731, "logits/rejected": 0.9438517689704895, "logps/chosen": -1.369480848312378, "logps/rejected": -4.357855796813965, "loss": 0.6258, "nll_loss": 0.602940559387207, "rewards/accuracies": 1.0, "rewards/chosen": -0.13694807887077332, "rewards/margins": 0.29883748292922974, "rewards/rejected": -0.43578556180000305, "step": 6926 }, { "epoch": 18.965092402464066, "grad_norm": 3.558441638946533, "learning_rate": 5.10958904109589e-08, "log_odds_chosen": 2.1431140899658203, "log_odds_ratio": -0.23618948459625244, "logits/chosen": 0.7881364822387695, "logits/rejected": 0.7286041975021362, "logps/chosen": -1.3516333103179932, "logps/rejected": -3.2271296977996826, "loss": 0.4858, "nll_loss": 0.462226927280426, "rewards/accuracies": 1.0, "rewards/chosen": -0.1351633369922638, "rewards/margins": 0.1875496506690979, "rewards/rejected": -0.3227129876613617, "step": 6927 }, { "epoch": 18.967830253251197, "grad_norm": 3.216256856918335, "learning_rate": 5.095890410958904e-08, "log_odds_chosen": 3.3823070526123047, "log_odds_ratio": -0.15159808099269867, "logits/chosen": 1.066672444343567, "logits/rejected": 0.948338508605957, "logps/chosen": -1.442829966545105, "logps/rejected": -4.437171936035156, "loss": 0.5118, "nll_loss": 0.49659550189971924, "rewards/accuracies": 1.0, "rewards/chosen": -0.1442829966545105, "rewards/margins": 0.29943418502807617, "rewards/rejected": -0.44371718168258667, "step": 6928 }, { "epoch": 18.97056810403833, "grad_norm": 7.566105365753174, "learning_rate": 5.082191780821918e-08, "log_odds_chosen": 1.8885151147842407, "log_odds_ratio": -0.4092860817909241, "logits/chosen": 1.1652069091796875, "logits/rejected": 1.0928869247436523, "logps/chosen": -2.089366912841797, "logps/rejected": -3.7621655464172363, "loss": 0.5924, "nll_loss": 0.5515174865722656, "rewards/accuracies": 0.75, "rewards/chosen": -0.2089366912841797, "rewards/margins": 0.16727986931800842, "rewards/rejected": -0.3762165904045105, "step": 6929 }, { "epoch": 18.973305954825463, "grad_norm": 3.1027488708496094, "learning_rate": 5.068493150684931e-08, "log_odds_chosen": 5.952305793762207, "log_odds_ratio": -0.07239040732383728, "logits/chosen": 1.4051074981689453, "logits/rejected": 1.4410923719406128, "logps/chosen": -2.065518856048584, "logps/rejected": -7.8145341873168945, "loss": 0.5187, "nll_loss": 0.5114525556564331, "rewards/accuracies": 1.0, "rewards/chosen": -0.20655187964439392, "rewards/margins": 0.5749014616012573, "rewards/rejected": -0.7814533710479736, "step": 6930 }, { "epoch": 18.976043805612594, "grad_norm": 3.5476789474487305, "learning_rate": 5.054794520547945e-08, "log_odds_chosen": 4.135156631469727, "log_odds_ratio": -0.15314680337905884, "logits/chosen": 0.9915033578872681, "logits/rejected": 0.9663979411125183, "logps/chosen": -1.5064172744750977, "logps/rejected": -5.362623691558838, "loss": 0.534, "nll_loss": 0.5187281966209412, "rewards/accuracies": 0.875, "rewards/chosen": -0.15064173936843872, "rewards/margins": 0.385620653629303, "rewards/rejected": -0.5362623929977417, "step": 6931 }, { "epoch": 18.978781656399725, "grad_norm": 3.16206955909729, "learning_rate": 5.041095890410959e-08, "log_odds_chosen": 3.3743205070495605, "log_odds_ratio": -0.17691195011138916, "logits/chosen": 1.3475573062896729, "logits/rejected": 1.315093994140625, "logps/chosen": -0.9880548119544983, "logps/rejected": -3.950421094894409, "loss": 0.5068, "nll_loss": 0.48914214968681335, "rewards/accuracies": 1.0, "rewards/chosen": -0.0988054871559143, "rewards/margins": 0.29623666405677795, "rewards/rejected": -0.39504215121269226, "step": 6932 }, { "epoch": 18.98151950718686, "grad_norm": 3.838343620300293, "learning_rate": 5.0273972602739727e-08, "log_odds_chosen": 2.0547759532928467, "log_odds_ratio": -0.19595639407634735, "logits/chosen": 0.9794654846191406, "logits/rejected": 0.879957914352417, "logps/chosen": -1.6328109502792358, "logps/rejected": -3.434647560119629, "loss": 0.4482, "nll_loss": 0.4286104738712311, "rewards/accuracies": 1.0, "rewards/chosen": -0.16328109800815582, "rewards/margins": 0.18018367886543274, "rewards/rejected": -0.34346476197242737, "step": 6933 }, { "epoch": 18.98425735797399, "grad_norm": 9.308835983276367, "learning_rate": 5.013698630136986e-08, "log_odds_chosen": 1.8146133422851562, "log_odds_ratio": -0.511688232421875, "logits/chosen": 1.1344537734985352, "logits/rejected": 1.1237316131591797, "logps/chosen": -2.6001265048980713, "logps/rejected": -4.218656539916992, "loss": 0.6838, "nll_loss": 0.6326239109039307, "rewards/accuracies": 0.75, "rewards/chosen": -0.2600126564502716, "rewards/margins": 0.16185303032398224, "rewards/rejected": -0.42186570167541504, "step": 6934 }, { "epoch": 18.986995208761122, "grad_norm": 5.937194347381592, "learning_rate": 5e-08, "log_odds_chosen": 2.838320255279541, "log_odds_ratio": -0.22148410975933075, "logits/chosen": 0.9677602052688599, "logits/rejected": 0.9368894100189209, "logps/chosen": -2.2477431297302246, "logps/rejected": -4.906353950500488, "loss": 0.516, "nll_loss": 0.4938965141773224, "rewards/accuracies": 1.0, "rewards/chosen": -0.2247743308544159, "rewards/margins": 0.26586103439331055, "rewards/rejected": -0.49063539505004883, "step": 6935 }, { "epoch": 18.989733059548254, "grad_norm": 3.6443052291870117, "learning_rate": 4.9863013698630137e-08, "log_odds_chosen": 2.5549283027648926, "log_odds_ratio": -0.21756866574287415, "logits/chosen": 1.336423635482788, "logits/rejected": 1.1619502305984497, "logps/chosen": -1.2315897941589355, "logps/rejected": -3.5013904571533203, "loss": 0.3877, "nll_loss": 0.3659200370311737, "rewards/accuracies": 0.875, "rewards/chosen": -0.12315897643566132, "rewards/margins": 0.22698010504245758, "rewards/rejected": -0.3501390814781189, "step": 6936 }, { "epoch": 18.99247091033539, "grad_norm": 3.801638126373291, "learning_rate": 4.972602739726027e-08, "log_odds_chosen": 4.677430152893066, "log_odds_ratio": -0.15061180293560028, "logits/chosen": 0.9470070600509644, "logits/rejected": 0.9092179536819458, "logps/chosen": -1.8343288898468018, "logps/rejected": -6.344390869140625, "loss": 0.4899, "nll_loss": 0.4748746454715729, "rewards/accuracies": 1.0, "rewards/chosen": -0.18343287706375122, "rewards/margins": 0.4510062038898468, "rewards/rejected": -0.6344391107559204, "step": 6937 }, { "epoch": 18.99520876112252, "grad_norm": 3.944333076477051, "learning_rate": 4.9589041095890414e-08, "log_odds_chosen": 1.4736400842666626, "log_odds_ratio": -0.3327476978302002, "logits/chosen": 1.330010175704956, "logits/rejected": 1.2169657945632935, "logps/chosen": -1.6103627681732178, "logps/rejected": -2.909269094467163, "loss": 0.4406, "nll_loss": 0.4073682129383087, "rewards/accuracies": 0.875, "rewards/chosen": -0.16103628277778625, "rewards/margins": 0.12989062070846558, "rewards/rejected": -0.29092690348625183, "step": 6938 }, { "epoch": 18.99794661190965, "grad_norm": 3.4353878498077393, "learning_rate": 4.9452054794520546e-08, "log_odds_chosen": 6.644805908203125, "log_odds_ratio": -0.05773588642477989, "logits/chosen": 1.1828694343566895, "logits/rejected": 1.2009270191192627, "logps/chosen": -1.721919298171997, "logps/rejected": -8.156683921813965, "loss": 0.517, "nll_loss": 0.5112716555595398, "rewards/accuracies": 1.0, "rewards/chosen": -0.17219193279743195, "rewards/margins": 0.6434764862060547, "rewards/rejected": -0.8156684041023254, "step": 6939 }, { "epoch": 19.000684462696782, "grad_norm": 4.0219502449035645, "learning_rate": 4.931506849315068e-08, "log_odds_chosen": 2.9625425338745117, "log_odds_ratio": -0.10903788357973099, "logits/chosen": 1.4968371391296387, "logits/rejected": 1.5473922491073608, "logps/chosen": -2.345280647277832, "logps/rejected": -5.146203517913818, "loss": 0.5004, "nll_loss": 0.48949918150901794, "rewards/accuracies": 1.0, "rewards/chosen": -0.2345280945301056, "rewards/margins": 0.2800922691822052, "rewards/rejected": -0.5146203637123108, "step": 6940 }, { "epoch": 19.003422313483917, "grad_norm": 4.871822834014893, "learning_rate": 4.9178082191780824e-08, "log_odds_chosen": 2.654945135116577, "log_odds_ratio": -0.18295659124851227, "logits/chosen": 1.051493525505066, "logits/rejected": 1.0445382595062256, "logps/chosen": -1.6811202764511108, "logps/rejected": -4.009326934814453, "loss": 0.4808, "nll_loss": 0.4624778628349304, "rewards/accuracies": 1.0, "rewards/chosen": -0.16811202466487885, "rewards/margins": 0.23282068967819214, "rewards/rejected": -0.4009327292442322, "step": 6941 }, { "epoch": 19.006160164271048, "grad_norm": 5.647770404815674, "learning_rate": 4.9041095890410956e-08, "log_odds_chosen": 3.1289637088775635, "log_odds_ratio": -0.32420486211776733, "logits/chosen": 1.3397890329360962, "logits/rejected": 1.3158469200134277, "logps/chosen": -2.718017578125, "logps/rejected": -5.728307723999023, "loss": 0.5474, "nll_loss": 0.5149449110031128, "rewards/accuracies": 0.875, "rewards/chosen": -0.27180176973342896, "rewards/margins": 0.3010289967060089, "rewards/rejected": -0.5728307962417603, "step": 6942 }, { "epoch": 19.00889801505818, "grad_norm": 3.367875576019287, "learning_rate": 4.8904109589041095e-08, "log_odds_chosen": 4.427404403686523, "log_odds_ratio": -0.08883358538150787, "logits/chosen": 0.8291986584663391, "logits/rejected": 0.7859060764312744, "logps/chosen": -2.1350035667419434, "logps/rejected": -6.306517124176025, "loss": 0.4822, "nll_loss": 0.4733109474182129, "rewards/accuracies": 1.0, "rewards/chosen": -0.21350036561489105, "rewards/margins": 0.4171513319015503, "rewards/rejected": -0.6306517124176025, "step": 6943 }, { "epoch": 19.01163586584531, "grad_norm": 5.551638126373291, "learning_rate": 4.876712328767123e-08, "log_odds_chosen": 2.047039031982422, "log_odds_ratio": -0.3028010129928589, "logits/chosen": 0.7000800371170044, "logits/rejected": 0.6292948722839355, "logps/chosen": -1.708739995956421, "logps/rejected": -3.5203678607940674, "loss": 0.5151, "nll_loss": 0.48477473855018616, "rewards/accuracies": 0.875, "rewards/chosen": -0.17087401449680328, "rewards/margins": 0.1811627894639969, "rewards/rejected": -0.3520367741584778, "step": 6944 }, { "epoch": 19.014373716632445, "grad_norm": 3.470625162124634, "learning_rate": 4.8630136986301366e-08, "log_odds_chosen": 1.9048535823822021, "log_odds_ratio": -0.18933027982711792, "logits/chosen": 0.9692353010177612, "logits/rejected": 0.9401231408119202, "logps/chosen": -1.6838033199310303, "logps/rejected": -3.401496410369873, "loss": 0.5613, "nll_loss": 0.5423808097839355, "rewards/accuracies": 1.0, "rewards/chosen": -0.16838032007217407, "rewards/margins": 0.17176933586597443, "rewards/rejected": -0.3401496708393097, "step": 6945 }, { "epoch": 19.017111567419576, "grad_norm": 10.560037612915039, "learning_rate": 4.8493150684931505e-08, "log_odds_chosen": 1.022763729095459, "log_odds_ratio": -0.779681921005249, "logits/chosen": 1.2321183681488037, "logits/rejected": 1.2118340730667114, "logps/chosen": -2.6455094814300537, "logps/rejected": -3.562131881713867, "loss": 0.6486, "nll_loss": 0.5706095695495605, "rewards/accuracies": 0.625, "rewards/chosen": -0.26455095410346985, "rewards/margins": 0.09166224300861359, "rewards/rejected": -0.35621318221092224, "step": 6946 }, { "epoch": 19.019849418206707, "grad_norm": 4.3205790519714355, "learning_rate": 4.835616438356164e-08, "log_odds_chosen": 2.2409920692443848, "log_odds_ratio": -0.17713576555252075, "logits/chosen": 0.8651474714279175, "logits/rejected": 0.7848494052886963, "logps/chosen": -1.969440221786499, "logps/rejected": -4.03750467300415, "loss": 0.4991, "nll_loss": 0.48138052225112915, "rewards/accuracies": 1.0, "rewards/chosen": -0.19694402813911438, "rewards/margins": 0.20680645108222961, "rewards/rejected": -0.403750479221344, "step": 6947 }, { "epoch": 19.02258726899384, "grad_norm": 3.8370325565338135, "learning_rate": 4.821917808219178e-08, "log_odds_chosen": 3.2542574405670166, "log_odds_ratio": -0.09643902629613876, "logits/chosen": 1.1579394340515137, "logits/rejected": 1.1392275094985962, "logps/chosen": -1.8066633939743042, "logps/rejected": -4.819005489349365, "loss": 0.463, "nll_loss": 0.45332178473472595, "rewards/accuracies": 1.0, "rewards/chosen": -0.18066634237766266, "rewards/margins": 0.3012341856956482, "rewards/rejected": -0.48190057277679443, "step": 6948 }, { "epoch": 19.025325119780973, "grad_norm": 4.007104873657227, "learning_rate": 4.8082191780821915e-08, "log_odds_chosen": 2.3673043251037598, "log_odds_ratio": -0.2529594898223877, "logits/chosen": 1.1770166158676147, "logits/rejected": 1.0662508010864258, "logps/chosen": -1.567097544670105, "logps/rejected": -3.7328670024871826, "loss": 0.464, "nll_loss": 0.438702791929245, "rewards/accuracies": 0.875, "rewards/chosen": -0.15670976042747498, "rewards/margins": 0.21657694876194, "rewards/rejected": -0.3732866942882538, "step": 6949 }, { "epoch": 19.028062970568104, "grad_norm": 8.465174674987793, "learning_rate": 4.794520547945205e-08, "log_odds_chosen": 1.6091337203979492, "log_odds_ratio": -0.3942050635814667, "logits/chosen": 1.385271668434143, "logits/rejected": 1.3544254302978516, "logps/chosen": -1.8985321521759033, "logps/rejected": -3.3435513973236084, "loss": 0.5059, "nll_loss": 0.4664941728115082, "rewards/accuracies": 0.875, "rewards/chosen": -0.1898532211780548, "rewards/margins": 0.1445019394159317, "rewards/rejected": -0.3343551754951477, "step": 6950 }, { "epoch": 19.030800821355236, "grad_norm": 2.9210665225982666, "learning_rate": 4.780821917808219e-08, "log_odds_chosen": 2.6667213439941406, "log_odds_ratio": -0.13870468735694885, "logits/chosen": 1.2040801048278809, "logits/rejected": 1.1794471740722656, "logps/chosen": -1.2212305068969727, "logps/rejected": -3.5772509574890137, "loss": 0.4194, "nll_loss": 0.4054986536502838, "rewards/accuracies": 1.0, "rewards/chosen": -0.12212305516004562, "rewards/margins": 0.23560205101966858, "rewards/rejected": -0.3577250838279724, "step": 6951 }, { "epoch": 19.033538672142367, "grad_norm": 4.287741661071777, "learning_rate": 4.7671232876712325e-08, "log_odds_chosen": 1.2391606569290161, "log_odds_ratio": -0.28487858176231384, "logits/chosen": 0.9867472648620605, "logits/rejected": 0.8439476490020752, "logps/chosen": -1.5622309446334839, "logps/rejected": -2.62009596824646, "loss": 0.4679, "nll_loss": 0.4394046664237976, "rewards/accuracies": 1.0, "rewards/chosen": -0.1562230885028839, "rewards/margins": 0.1057865172624588, "rewards/rejected": -0.2620095908641815, "step": 6952 }, { "epoch": 19.0362765229295, "grad_norm": 3.9410159587860107, "learning_rate": 4.7534246575342464e-08, "log_odds_chosen": 5.813055992126465, "log_odds_ratio": -0.06863544136285782, "logits/chosen": 0.9866288900375366, "logits/rejected": 1.0075366497039795, "logps/chosen": -1.8544411659240723, "logps/rejected": -7.427407741546631, "loss": 0.4987, "nll_loss": 0.4918642044067383, "rewards/accuracies": 1.0, "rewards/chosen": -0.18544414639472961, "rewards/margins": 0.557296633720398, "rewards/rejected": -0.74274080991745, "step": 6953 }, { "epoch": 19.039014373716633, "grad_norm": 4.476354598999023, "learning_rate": 4.73972602739726e-08, "log_odds_chosen": 1.8556559085845947, "log_odds_ratio": -0.2912091314792633, "logits/chosen": 0.9735409021377563, "logits/rejected": 0.9379724264144897, "logps/chosen": -1.519169807434082, "logps/rejected": -3.2121176719665527, "loss": 0.4962, "nll_loss": 0.4670502543449402, "rewards/accuracies": 0.875, "rewards/chosen": -0.1519169807434082, "rewards/margins": 0.1692948341369629, "rewards/rejected": -0.3212118148803711, "step": 6954 }, { "epoch": 19.041752224503764, "grad_norm": 3.4911348819732666, "learning_rate": 4.7260273972602735e-08, "log_odds_chosen": 1.6398180723190308, "log_odds_ratio": -0.3205646872520447, "logits/chosen": 1.3653087615966797, "logits/rejected": 1.3120286464691162, "logps/chosen": -1.4598116874694824, "logps/rejected": -2.9352784156799316, "loss": 0.4492, "nll_loss": 0.41710567474365234, "rewards/accuracies": 0.75, "rewards/chosen": -0.14598119258880615, "rewards/margins": 0.1475466787815094, "rewards/rejected": -0.29352784156799316, "step": 6955 }, { "epoch": 19.044490075290895, "grad_norm": 4.1346235275268555, "learning_rate": 4.7123287671232874e-08, "log_odds_chosen": 4.0154032707214355, "log_odds_ratio": -0.27414408326148987, "logits/chosen": 0.9002830982208252, "logits/rejected": 0.9219180345535278, "logps/chosen": -2.0602619647979736, "logps/rejected": -5.921348571777344, "loss": 0.6189, "nll_loss": 0.5914533734321594, "rewards/accuracies": 0.875, "rewards/chosen": -0.20602621138095856, "rewards/margins": 0.3861086368560791, "rewards/rejected": -0.5921348333358765, "step": 6956 }, { "epoch": 19.04722792607803, "grad_norm": 3.6116113662719727, "learning_rate": 4.698630136986301e-08, "log_odds_chosen": 2.135216236114502, "log_odds_ratio": -0.23151496052742004, "logits/chosen": 1.0407001972198486, "logits/rejected": 1.0392463207244873, "logps/chosen": -1.7104637622833252, "logps/rejected": -3.646920919418335, "loss": 0.4942, "nll_loss": 0.4710671007633209, "rewards/accuracies": 1.0, "rewards/chosen": -0.17104637622833252, "rewards/margins": 0.19364571571350098, "rewards/rejected": -0.3646920919418335, "step": 6957 }, { "epoch": 19.04996577686516, "grad_norm": 6.676967620849609, "learning_rate": 4.684931506849315e-08, "log_odds_chosen": 1.6153888702392578, "log_odds_ratio": -0.404581218957901, "logits/chosen": 1.0478564500808716, "logits/rejected": 1.0951358079910278, "logps/chosen": -1.7718725204467773, "logps/rejected": -3.108595848083496, "loss": 0.5027, "nll_loss": 0.46226754784584045, "rewards/accuracies": 0.75, "rewards/chosen": -0.1771872639656067, "rewards/margins": 0.13367235660552979, "rewards/rejected": -0.3108596205711365, "step": 6958 }, { "epoch": 19.052703627652292, "grad_norm": 3.3579461574554443, "learning_rate": 4.6712328767123284e-08, "log_odds_chosen": 2.6965999603271484, "log_odds_ratio": -0.20265018939971924, "logits/chosen": 0.9665746092796326, "logits/rejected": 0.9587618112564087, "logps/chosen": -1.5574626922607422, "logps/rejected": -3.886732816696167, "loss": 0.4318, "nll_loss": 0.4115504026412964, "rewards/accuracies": 0.875, "rewards/chosen": -0.15574628114700317, "rewards/margins": 0.23292702436447144, "rewards/rejected": -0.3886732757091522, "step": 6959 }, { "epoch": 19.055441478439427, "grad_norm": 3.784337043762207, "learning_rate": 4.657534246575342e-08, "log_odds_chosen": 2.3925154209136963, "log_odds_ratio": -0.18657368421554565, "logits/chosen": 0.7311927080154419, "logits/rejected": 0.6769099235534668, "logps/chosen": -1.4427213668823242, "logps/rejected": -3.5857088565826416, "loss": 0.5258, "nll_loss": 0.5071847438812256, "rewards/accuracies": 1.0, "rewards/chosen": -0.14427214860916138, "rewards/margins": 0.21429875493049622, "rewards/rejected": -0.3585709035396576, "step": 6960 }, { "epoch": 19.058179329226558, "grad_norm": 4.824021816253662, "learning_rate": 4.643835616438356e-08, "log_odds_chosen": 4.981898307800293, "log_odds_ratio": -0.29575496912002563, "logits/chosen": 1.0702518224716187, "logits/rejected": 1.0816713571548462, "logps/chosen": -2.3850173950195312, "logps/rejected": -7.298358917236328, "loss": 0.6917, "nll_loss": 0.6621143817901611, "rewards/accuracies": 0.625, "rewards/chosen": -0.23850175738334656, "rewards/margins": 0.49133414030075073, "rewards/rejected": -0.7298359274864197, "step": 6961 }, { "epoch": 19.06091718001369, "grad_norm": 4.106293201446533, "learning_rate": 4.6301369863013694e-08, "log_odds_chosen": 3.3242387771606445, "log_odds_ratio": -0.32242539525032043, "logits/chosen": 1.0234248638153076, "logits/rejected": 0.9752802848815918, "logps/chosen": -1.6853185892105103, "logps/rejected": -4.871175289154053, "loss": 0.4907, "nll_loss": 0.458459734916687, "rewards/accuracies": 0.875, "rewards/chosen": -0.1685318499803543, "rewards/margins": 0.31858569383621216, "rewards/rejected": -0.4871175289154053, "step": 6962 }, { "epoch": 19.06365503080082, "grad_norm": 3.458475351333618, "learning_rate": 4.616438356164384e-08, "log_odds_chosen": 1.6355936527252197, "log_odds_ratio": -0.2959698736667633, "logits/chosen": 0.991868257522583, "logits/rejected": 0.908737301826477, "logps/chosen": -1.8542289733886719, "logps/rejected": -3.3265628814697266, "loss": 0.5488, "nll_loss": 0.5191654562950134, "rewards/accuracies": 0.875, "rewards/chosen": -0.1854228973388672, "rewards/margins": 0.14723336696624756, "rewards/rejected": -0.33265626430511475, "step": 6963 }, { "epoch": 19.066392881587955, "grad_norm": 8.180744171142578, "learning_rate": 4.602739726027397e-08, "log_odds_chosen": 2.1291117668151855, "log_odds_ratio": -0.43117019534111023, "logits/chosen": 1.0055197477340698, "logits/rejected": 0.9605046510696411, "logps/chosen": -2.6186039447784424, "logps/rejected": -4.687690734863281, "loss": 0.6788, "nll_loss": 0.6356772184371948, "rewards/accuracies": 0.875, "rewards/chosen": -0.2618604004383087, "rewards/margins": 0.20690865814685822, "rewards/rejected": -0.4687690734863281, "step": 6964 }, { "epoch": 19.069130732375086, "grad_norm": 3.2390570640563965, "learning_rate": 4.5890410958904103e-08, "log_odds_chosen": 3.0676589012145996, "log_odds_ratio": -0.2037106603384018, "logits/chosen": 1.1406748294830322, "logits/rejected": 1.1213669776916504, "logps/chosen": -1.503180742263794, "logps/rejected": -4.390273571014404, "loss": 0.4714, "nll_loss": 0.45102161169052124, "rewards/accuracies": 1.0, "rewards/chosen": -0.15031808614730835, "rewards/margins": 0.2887093126773834, "rewards/rejected": -0.4390273690223694, "step": 6965 }, { "epoch": 19.071868583162217, "grad_norm": 5.431195259094238, "learning_rate": 4.575342465753424e-08, "log_odds_chosen": 2.0568625926971436, "log_odds_ratio": -0.392085999250412, "logits/chosen": 1.1439127922058105, "logits/rejected": 1.1410115957260132, "logps/chosen": -2.0040769577026367, "logps/rejected": -3.7888987064361572, "loss": 0.4709, "nll_loss": 0.43165600299835205, "rewards/accuracies": 0.875, "rewards/chosen": -0.20040768384933472, "rewards/margins": 0.17848218977451324, "rewards/rejected": -0.37888988852500916, "step": 6966 }, { "epoch": 19.07460643394935, "grad_norm": 4.791996955871582, "learning_rate": 4.561643835616438e-08, "log_odds_chosen": 4.573118686676025, "log_odds_ratio": -0.24307112395763397, "logits/chosen": 1.21157968044281, "logits/rejected": 1.2418608665466309, "logps/chosen": -2.60396671295166, "logps/rejected": -7.07607889175415, "loss": 0.6207, "nll_loss": 0.5963537096977234, "rewards/accuracies": 0.875, "rewards/chosen": -0.26039665937423706, "rewards/margins": 0.44721120595932007, "rewards/rejected": -0.7076078653335571, "step": 6967 }, { "epoch": 19.077344284736483, "grad_norm": 4.173375606536865, "learning_rate": 4.547945205479452e-08, "log_odds_chosen": 3.1231043338775635, "log_odds_ratio": -0.15310513973236084, "logits/chosen": 1.1725785732269287, "logits/rejected": 1.2325763702392578, "logps/chosen": -2.5726399421691895, "logps/rejected": -5.577287197113037, "loss": 0.6173, "nll_loss": 0.6019420623779297, "rewards/accuracies": 1.0, "rewards/chosen": -0.25726401805877686, "rewards/margins": 0.3004647195339203, "rewards/rejected": -0.5577287673950195, "step": 6968 }, { "epoch": 19.080082135523615, "grad_norm": 4.250936508178711, "learning_rate": 4.534246575342465e-08, "log_odds_chosen": 2.4795241355895996, "log_odds_ratio": -0.32437029480934143, "logits/chosen": 1.0285840034484863, "logits/rejected": 1.1102770566940308, "logps/chosen": -2.562431812286377, "logps/rejected": -5.018381118774414, "loss": 0.5295, "nll_loss": 0.49706393480300903, "rewards/accuracies": 0.875, "rewards/chosen": -0.25624319911003113, "rewards/margins": 0.24559491872787476, "rewards/rejected": -0.5018381476402283, "step": 6969 }, { "epoch": 19.082819986310746, "grad_norm": 4.272684097290039, "learning_rate": 4.520547945205479e-08, "log_odds_chosen": 4.566659927368164, "log_odds_ratio": -0.08949048817157745, "logits/chosen": 1.1004079580307007, "logits/rejected": 1.056373119354248, "logps/chosen": -1.5236937999725342, "logps/rejected": -5.776971340179443, "loss": 0.4901, "nll_loss": 0.48111826181411743, "rewards/accuracies": 1.0, "rewards/chosen": -0.15236936509609222, "rewards/margins": 0.42532777786254883, "rewards/rejected": -0.5776971578598022, "step": 6970 }, { "epoch": 19.085557837097877, "grad_norm": 11.198929786682129, "learning_rate": 4.506849315068493e-08, "log_odds_chosen": 2.7273948192596436, "log_odds_ratio": -0.33372944593429565, "logits/chosen": 1.1636288166046143, "logits/rejected": 1.1463748216629028, "logps/chosen": -2.1138434410095215, "logps/rejected": -4.657435894012451, "loss": 0.5655, "nll_loss": 0.5321762561798096, "rewards/accuracies": 0.875, "rewards/chosen": -0.2113843411207199, "rewards/margins": 0.25435924530029297, "rewards/rejected": -0.4657436013221741, "step": 6971 }, { "epoch": 19.08829568788501, "grad_norm": 3.7024028301239014, "learning_rate": 4.493150684931506e-08, "log_odds_chosen": 4.279750347137451, "log_odds_ratio": -0.1783967912197113, "logits/chosen": 1.1096372604370117, "logits/rejected": 1.0585424900054932, "logps/chosen": -1.73370361328125, "logps/rejected": -5.854730606079102, "loss": 0.5593, "nll_loss": 0.5414115190505981, "rewards/accuracies": 1.0, "rewards/chosen": -0.1733703911304474, "rewards/margins": 0.41210266947746277, "rewards/rejected": -0.5854730010032654, "step": 6972 }, { "epoch": 19.091033538672143, "grad_norm": 6.2042083740234375, "learning_rate": 4.479452054794521e-08, "log_odds_chosen": 3.956102132797241, "log_odds_ratio": -0.3176801800727844, "logits/chosen": 1.0597684383392334, "logits/rejected": 1.0565276145935059, "logps/chosen": -2.0690300464630127, "logps/rejected": -5.878697395324707, "loss": 0.5922, "nll_loss": 0.5604200959205627, "rewards/accuracies": 0.875, "rewards/chosen": -0.20690301060676575, "rewards/margins": 0.3809667229652405, "rewards/rejected": -0.5878697633743286, "step": 6973 }, { "epoch": 19.093771389459274, "grad_norm": 5.088062763214111, "learning_rate": 4.465753424657534e-08, "log_odds_chosen": 1.4477053880691528, "log_odds_ratio": -0.553772509098053, "logits/chosen": 1.0717099905014038, "logits/rejected": 1.0425180196762085, "logps/chosen": -2.728891372680664, "logps/rejected": -3.9402554035186768, "loss": 0.7839, "nll_loss": 0.7285155653953552, "rewards/accuracies": 0.75, "rewards/chosen": -0.2728891372680664, "rewards/margins": 0.12113641202449799, "rewards/rejected": -0.3940255641937256, "step": 6974 }, { "epoch": 19.096509240246405, "grad_norm": 6.146921634674072, "learning_rate": 4.452054794520547e-08, "log_odds_chosen": 1.1488115787506104, "log_odds_ratio": -0.4669229984283447, "logits/chosen": 1.1186625957489014, "logits/rejected": 1.0766271352767944, "logps/chosen": -2.4030003547668457, "logps/rejected": -3.436096429824829, "loss": 0.5395, "nll_loss": 0.492828905582428, "rewards/accuracies": 0.75, "rewards/chosen": -0.2403000295162201, "rewards/margins": 0.10330961644649506, "rewards/rejected": -0.34360966086387634, "step": 6975 }, { "epoch": 19.09924709103354, "grad_norm": 3.817021369934082, "learning_rate": 4.438356164383562e-08, "log_odds_chosen": 4.145960330963135, "log_odds_ratio": -0.06293367594480515, "logits/chosen": 1.0642338991165161, "logits/rejected": 1.084100604057312, "logps/chosen": -1.7524304389953613, "logps/rejected": -5.646669387817383, "loss": 0.6393, "nll_loss": 0.6330211162567139, "rewards/accuracies": 1.0, "rewards/chosen": -0.1752430498600006, "rewards/margins": 0.3894239068031311, "rewards/rejected": -0.5646669864654541, "step": 6976 }, { "epoch": 19.10198494182067, "grad_norm": 4.291441917419434, "learning_rate": 4.424657534246575e-08, "log_odds_chosen": 2.485445976257324, "log_odds_ratio": -0.15068857371807098, "logits/chosen": 1.058577060699463, "logits/rejected": 1.121890664100647, "logps/chosen": -2.4754698276519775, "logps/rejected": -4.820044994354248, "loss": 0.7083, "nll_loss": 0.6932597756385803, "rewards/accuracies": 1.0, "rewards/chosen": -0.24754698574543, "rewards/margins": 0.23445755243301392, "rewards/rejected": -0.4820045232772827, "step": 6977 }, { "epoch": 19.104722792607802, "grad_norm": 3.4714913368225098, "learning_rate": 4.410958904109589e-08, "log_odds_chosen": 3.68398380279541, "log_odds_ratio": -0.2316022366285324, "logits/chosen": 1.4870251417160034, "logits/rejected": 1.5209181308746338, "logps/chosen": -1.9532594680786133, "logps/rejected": -5.5265421867370605, "loss": 0.4764, "nll_loss": 0.45326024293899536, "rewards/accuracies": 0.875, "rewards/chosen": -0.19532594084739685, "rewards/margins": 0.35732829570770264, "rewards/rejected": -0.5526542067527771, "step": 6978 }, { "epoch": 19.107460643394933, "grad_norm": 3.8581793308258057, "learning_rate": 4.397260273972603e-08, "log_odds_chosen": 1.4967801570892334, "log_odds_ratio": -0.3613213002681732, "logits/chosen": 1.1079453229904175, "logits/rejected": 1.0482380390167236, "logps/chosen": -1.8079177141189575, "logps/rejected": -3.2003073692321777, "loss": 0.4771, "nll_loss": 0.44094425439834595, "rewards/accuracies": 1.0, "rewards/chosen": -0.18079178035259247, "rewards/margins": 0.13923895359039307, "rewards/rejected": -0.32003071904182434, "step": 6979 }, { "epoch": 19.110198494182068, "grad_norm": 3.6219210624694824, "learning_rate": 4.383561643835616e-08, "log_odds_chosen": 1.921607255935669, "log_odds_ratio": -0.19705384969711304, "logits/chosen": 0.9688599109649658, "logits/rejected": 0.9699249267578125, "logps/chosen": -1.8264951705932617, "logps/rejected": -3.5964608192443848, "loss": 0.5692, "nll_loss": 0.5495433807373047, "rewards/accuracies": 1.0, "rewards/chosen": -0.18264952301979065, "rewards/margins": 0.17699654400348663, "rewards/rejected": -0.3596460521221161, "step": 6980 }, { "epoch": 19.1129363449692, "grad_norm": 3.667945384979248, "learning_rate": 4.36986301369863e-08, "log_odds_chosen": 1.9268648624420166, "log_odds_ratio": -0.2723783850669861, "logits/chosen": 1.0281556844711304, "logits/rejected": 0.8950942158699036, "logps/chosen": -1.4365758895874023, "logps/rejected": -3.0838582515716553, "loss": 0.5127, "nll_loss": 0.4854414165019989, "rewards/accuracies": 1.0, "rewards/chosen": -0.1436575949192047, "rewards/margins": 0.16472823917865753, "rewards/rejected": -0.30838581919670105, "step": 6981 }, { "epoch": 19.11567419575633, "grad_norm": 3.6737492084503174, "learning_rate": 4.356164383561644e-08, "log_odds_chosen": 3.4089877605438232, "log_odds_ratio": -0.08987089991569519, "logits/chosen": 1.0681440830230713, "logits/rejected": 0.9072461128234863, "logps/chosen": -1.8439215421676636, "logps/rejected": -5.051241397857666, "loss": 0.5411, "nll_loss": 0.5321439504623413, "rewards/accuracies": 1.0, "rewards/chosen": -0.18439215421676636, "rewards/margins": 0.3207320272922516, "rewards/rejected": -0.5051242113113403, "step": 6982 }, { "epoch": 19.11841204654346, "grad_norm": 5.12202787399292, "learning_rate": 4.3424657534246576e-08, "log_odds_chosen": 3.173802375793457, "log_odds_ratio": -0.11084195226430893, "logits/chosen": 1.2090767621994019, "logits/rejected": 1.2915823459625244, "logps/chosen": -2.841538906097412, "logps/rejected": -5.896214962005615, "loss": 0.72, "nll_loss": 0.7089495658874512, "rewards/accuracies": 1.0, "rewards/chosen": -0.28415387868881226, "rewards/margins": 0.3054676055908203, "rewards/rejected": -0.5896214842796326, "step": 6983 }, { "epoch": 19.121149897330596, "grad_norm": 4.4452595710754395, "learning_rate": 4.328767123287671e-08, "log_odds_chosen": 1.4787753820419312, "log_odds_ratio": -0.2750638723373413, "logits/chosen": 1.0773684978485107, "logits/rejected": 0.9376282691955566, "logps/chosen": -1.6179540157318115, "logps/rejected": -2.8784191608428955, "loss": 0.4681, "nll_loss": 0.4406244158744812, "rewards/accuracies": 0.875, "rewards/chosen": -0.16179540753364563, "rewards/margins": 0.12604649364948273, "rewards/rejected": -0.28784191608428955, "step": 6984 }, { "epoch": 19.123887748117728, "grad_norm": 11.265129089355469, "learning_rate": 4.3150684931506854e-08, "log_odds_chosen": 2.6647326946258545, "log_odds_ratio": -0.3464169502258301, "logits/chosen": 1.2583004236221313, "logits/rejected": 1.2894169092178345, "logps/chosen": -3.668092727661133, "logps/rejected": -6.2433953285217285, "loss": 0.7824, "nll_loss": 0.7477107644081116, "rewards/accuracies": 0.875, "rewards/chosen": -0.36680924892425537, "rewards/margins": 0.2575303316116333, "rewards/rejected": -0.6243395805358887, "step": 6985 }, { "epoch": 19.12662559890486, "grad_norm": 3.816390037536621, "learning_rate": 4.3013698630136986e-08, "log_odds_chosen": 2.7455127239227295, "log_odds_ratio": -0.16761070489883423, "logits/chosen": 1.131955862045288, "logits/rejected": 1.1528751850128174, "logps/chosen": -1.7343052625656128, "logps/rejected": -4.256899356842041, "loss": 0.5027, "nll_loss": 0.4858933985233307, "rewards/accuracies": 1.0, "rewards/chosen": -0.17343053221702576, "rewards/margins": 0.25225943326950073, "rewards/rejected": -0.4256899654865265, "step": 6986 }, { "epoch": 19.129363449691994, "grad_norm": 3.4677371978759766, "learning_rate": 4.287671232876712e-08, "log_odds_chosen": 2.39188551902771, "log_odds_ratio": -0.1761055737733841, "logits/chosen": 1.3321082592010498, "logits/rejected": 1.2632173299789429, "logps/chosen": -2.1066830158233643, "logps/rejected": -4.343969345092773, "loss": 0.5393, "nll_loss": 0.521738588809967, "rewards/accuracies": 1.0, "rewards/chosen": -0.21066829562187195, "rewards/margins": 0.22372864186763763, "rewards/rejected": -0.4343969225883484, "step": 6987 }, { "epoch": 19.132101300479125, "grad_norm": 3.704116106033325, "learning_rate": 4.2739726027397264e-08, "log_odds_chosen": 2.7903928756713867, "log_odds_ratio": -0.1463293433189392, "logits/chosen": 1.2680796384811401, "logits/rejected": 1.1469106674194336, "logps/chosen": -1.6018860340118408, "logps/rejected": -4.125967979431152, "loss": 0.5393, "nll_loss": 0.5246574282646179, "rewards/accuracies": 1.0, "rewards/chosen": -0.16018861532211304, "rewards/margins": 0.2524082064628601, "rewards/rejected": -0.41259679198265076, "step": 6988 }, { "epoch": 19.134839151266256, "grad_norm": 3.9693050384521484, "learning_rate": 4.2602739726027396e-08, "log_odds_chosen": 4.634349346160889, "log_odds_ratio": -0.04714222252368927, "logits/chosen": 1.163718819618225, "logits/rejected": 1.2322778701782227, "logps/chosen": -2.141519546508789, "logps/rejected": -6.481224060058594, "loss": 0.5919, "nll_loss": 0.5872341394424438, "rewards/accuracies": 1.0, "rewards/chosen": -0.21415194869041443, "rewards/margins": 0.43397045135498047, "rewards/rejected": -0.6481224298477173, "step": 6989 }, { "epoch": 19.137577002053387, "grad_norm": 4.172204494476318, "learning_rate": 4.246575342465753e-08, "log_odds_chosen": 1.5849896669387817, "log_odds_ratio": -0.42540907859802246, "logits/chosen": 1.1529088020324707, "logits/rejected": 1.1762745380401611, "logps/chosen": -2.0352330207824707, "logps/rejected": -3.5191121101379395, "loss": 0.6126, "nll_loss": 0.5700989961624146, "rewards/accuracies": 0.875, "rewards/chosen": -0.20352330803871155, "rewards/margins": 0.14838792383670807, "rewards/rejected": -0.3519112467765808, "step": 6990 }, { "epoch": 19.140314852840522, "grad_norm": 4.10325288772583, "learning_rate": 4.232876712328767e-08, "log_odds_chosen": 2.3840012550354004, "log_odds_ratio": -0.2657170593738556, "logits/chosen": 1.208678960800171, "logits/rejected": 1.2143030166625977, "logps/chosen": -1.5976347923278809, "logps/rejected": -3.7967891693115234, "loss": 0.5239, "nll_loss": 0.4973188042640686, "rewards/accuracies": 1.0, "rewards/chosen": -0.15976347029209137, "rewards/margins": 0.2199154496192932, "rewards/rejected": -0.379678875207901, "step": 6991 }, { "epoch": 19.143052703627653, "grad_norm": 4.936805725097656, "learning_rate": 4.2191780821917806e-08, "log_odds_chosen": 2.6812405586242676, "log_odds_ratio": -0.19352859258651733, "logits/chosen": 1.191615343093872, "logits/rejected": 1.1531299352645874, "logps/chosen": -2.0410239696502686, "logps/rejected": -4.6020050048828125, "loss": 0.5625, "nll_loss": 0.5431714057922363, "rewards/accuracies": 1.0, "rewards/chosen": -0.20410239696502686, "rewards/margins": 0.25609809160232544, "rewards/rejected": -0.4602004885673523, "step": 6992 }, { "epoch": 19.145790554414784, "grad_norm": 3.3847124576568604, "learning_rate": 4.2054794520547945e-08, "log_odds_chosen": 3.5138156414031982, "log_odds_ratio": -0.15997810661792755, "logits/chosen": 1.0637439489364624, "logits/rejected": 1.029587745666504, "logps/chosen": -1.2095224857330322, "logps/rejected": -4.421528339385986, "loss": 0.4981, "nll_loss": 0.48214292526245117, "rewards/accuracies": 1.0, "rewards/chosen": -0.12095224857330322, "rewards/margins": 0.3212006092071533, "rewards/rejected": -0.44215285778045654, "step": 6993 }, { "epoch": 19.148528405201915, "grad_norm": 3.5607872009277344, "learning_rate": 4.191780821917808e-08, "log_odds_chosen": 1.7184358835220337, "log_odds_ratio": -0.19423069059848785, "logits/chosen": 1.0847070217132568, "logits/rejected": 1.0305815935134888, "logps/chosen": -1.443015217781067, "logps/rejected": -2.879751443862915, "loss": 0.5349, "nll_loss": 0.5155249238014221, "rewards/accuracies": 1.0, "rewards/chosen": -0.14430151879787445, "rewards/margins": 0.1436736285686493, "rewards/rejected": -0.28797516226768494, "step": 6994 }, { "epoch": 19.15126625598905, "grad_norm": 7.221524715423584, "learning_rate": 4.178082191780822e-08, "log_odds_chosen": 1.8307101726531982, "log_odds_ratio": -0.23411425948143005, "logits/chosen": 0.8763201832771301, "logits/rejected": 0.8062106370925903, "logps/chosen": -1.8269786834716797, "logps/rejected": -3.493941068649292, "loss": 0.555, "nll_loss": 0.5316298604011536, "rewards/accuracies": 1.0, "rewards/chosen": -0.1826978623867035, "rewards/margins": 0.166696235537529, "rewards/rejected": -0.34939414262771606, "step": 6995 }, { "epoch": 19.15400410677618, "grad_norm": 5.313856601715088, "learning_rate": 4.1643835616438355e-08, "log_odds_chosen": 2.83803129196167, "log_odds_ratio": -0.19127658009529114, "logits/chosen": 1.0792717933654785, "logits/rejected": 1.094003438949585, "logps/chosen": -2.9290683269500732, "logps/rejected": -5.609026908874512, "loss": 0.6087, "nll_loss": 0.5896214246749878, "rewards/accuracies": 1.0, "rewards/chosen": -0.29290682077407837, "rewards/margins": 0.2679958641529083, "rewards/rejected": -0.5609027147293091, "step": 6996 }, { "epoch": 19.156741957563312, "grad_norm": 9.842323303222656, "learning_rate": 4.150684931506849e-08, "log_odds_chosen": 2.1967122554779053, "log_odds_ratio": -0.7102373242378235, "logits/chosen": 1.1204354763031006, "logits/rejected": 1.1327571868896484, "logps/chosen": -2.674473762512207, "logps/rejected": -4.760274887084961, "loss": 0.556, "nll_loss": 0.4849451780319214, "rewards/accuracies": 0.875, "rewards/chosen": -0.2674473822116852, "rewards/margins": 0.2085801064968109, "rewards/rejected": -0.4760275185108185, "step": 6997 }, { "epoch": 19.159479808350444, "grad_norm": 8.664446830749512, "learning_rate": 4.136986301369863e-08, "log_odds_chosen": 3.4262123107910156, "log_odds_ratio": -0.23574796319007874, "logits/chosen": 1.008055329322815, "logits/rejected": 0.9689685106277466, "logps/chosen": -2.0255205631256104, "logps/rejected": -5.185604572296143, "loss": 0.5759, "nll_loss": 0.5523304343223572, "rewards/accuracies": 0.875, "rewards/chosen": -0.20255205035209656, "rewards/margins": 0.31600841879844666, "rewards/rejected": -0.5185604691505432, "step": 6998 }, { "epoch": 19.16221765913758, "grad_norm": 4.272807598114014, "learning_rate": 4.1232876712328764e-08, "log_odds_chosen": 3.3241565227508545, "log_odds_ratio": -0.17354054749011993, "logits/chosen": 1.1991584300994873, "logits/rejected": 1.1497377157211304, "logps/chosen": -2.086604356765747, "logps/rejected": -5.228226661682129, "loss": 0.617, "nll_loss": 0.5996721982955933, "rewards/accuracies": 1.0, "rewards/chosen": -0.20866043865680695, "rewards/margins": 0.3141621947288513, "rewards/rejected": -0.5228226184844971, "step": 6999 }, { "epoch": 19.16495550992471, "grad_norm": 4.22611665725708, "learning_rate": 4.10958904109589e-08, "log_odds_chosen": 2.491077184677124, "log_odds_ratio": -0.40423527359962463, "logits/chosen": 1.2218689918518066, "logits/rejected": 1.1820564270019531, "logps/chosen": -1.911881923675537, "logps/rejected": -4.292091369628906, "loss": 0.5059, "nll_loss": 0.4654555916786194, "rewards/accuracies": 0.875, "rewards/chosen": -0.19118818640708923, "rewards/margins": 0.23802092671394348, "rewards/rejected": -0.4292091131210327, "step": 7000 }, { "epoch": 19.16769336071184, "grad_norm": 4.260663032531738, "learning_rate": 4.095890410958904e-08, "log_odds_chosen": 2.4093618392944336, "log_odds_ratio": -0.233270525932312, "logits/chosen": 1.0662075281143188, "logits/rejected": 1.1000372171401978, "logps/chosen": -2.3053746223449707, "logps/rejected": -4.604426383972168, "loss": 0.5414, "nll_loss": 0.5181159377098083, "rewards/accuracies": 1.0, "rewards/chosen": -0.23053747415542603, "rewards/margins": 0.2299051582813263, "rewards/rejected": -0.46044260263442993, "step": 7001 }, { "epoch": 19.170431211498972, "grad_norm": 3.665588617324829, "learning_rate": 4.0821917808219174e-08, "log_odds_chosen": 2.7785658836364746, "log_odds_ratio": -0.16570837795734406, "logits/chosen": 1.0702693462371826, "logits/rejected": 1.0659750699996948, "logps/chosen": -2.020453691482544, "logps/rejected": -4.633153438568115, "loss": 0.5564, "nll_loss": 0.5398144125938416, "rewards/accuracies": 1.0, "rewards/chosen": -0.20204535126686096, "rewards/margins": 0.26127001643180847, "rewards/rejected": -0.46331536769866943, "step": 7002 }, { "epoch": 19.173169062286107, "grad_norm": 4.065334320068359, "learning_rate": 4.068493150684931e-08, "log_odds_chosen": 3.0877931118011475, "log_odds_ratio": -0.1699984073638916, "logits/chosen": 1.2134768962860107, "logits/rejected": 1.1856813430786133, "logps/chosen": -1.3511868715286255, "logps/rejected": -4.158673286437988, "loss": 0.4188, "nll_loss": 0.401838093996048, "rewards/accuracies": 1.0, "rewards/chosen": -0.13511867821216583, "rewards/margins": 0.2807486355304718, "rewards/rejected": -0.41586732864379883, "step": 7003 }, { "epoch": 19.175906913073238, "grad_norm": 3.5387022495269775, "learning_rate": 4.054794520547945e-08, "log_odds_chosen": 2.2658982276916504, "log_odds_ratio": -0.21941736340522766, "logits/chosen": 1.204604983329773, "logits/rejected": 1.211704969406128, "logps/chosen": -1.609974980354309, "logps/rejected": -3.6998820304870605, "loss": 0.4424, "nll_loss": 0.4204111695289612, "rewards/accuracies": 1.0, "rewards/chosen": -0.16099750995635986, "rewards/margins": 0.20899072289466858, "rewards/rejected": -0.36998820304870605, "step": 7004 }, { "epoch": 19.17864476386037, "grad_norm": 7.489712715148926, "learning_rate": 4.041095890410959e-08, "log_odds_chosen": 2.7032785415649414, "log_odds_ratio": -0.22228728234767914, "logits/chosen": 0.9365188479423523, "logits/rejected": 0.9289105534553528, "logps/chosen": -2.4646401405334473, "logps/rejected": -5.029839515686035, "loss": 0.6929, "nll_loss": 0.6706414818763733, "rewards/accuracies": 1.0, "rewards/chosen": -0.24646399915218353, "rewards/margins": 0.25651997327804565, "rewards/rejected": -0.5029839873313904, "step": 7005 }, { "epoch": 19.1813826146475, "grad_norm": 4.427152156829834, "learning_rate": 4.027397260273972e-08, "log_odds_chosen": 1.832068920135498, "log_odds_ratio": -0.27723804116249084, "logits/chosen": 1.319887399673462, "logits/rejected": 1.304851770401001, "logps/chosen": -1.728975534439087, "logps/rejected": -3.2180464267730713, "loss": 0.4714, "nll_loss": 0.44368207454681396, "rewards/accuracies": 0.875, "rewards/chosen": -0.1728975623846054, "rewards/margins": 0.14890706539154053, "rewards/rejected": -0.32180464267730713, "step": 7006 }, { "epoch": 19.184120465434635, "grad_norm": 4.052349090576172, "learning_rate": 4.013698630136986e-08, "log_odds_chosen": 1.9549508094787598, "log_odds_ratio": -0.2693084478378296, "logits/chosen": 1.0181642770767212, "logits/rejected": 1.0640549659729004, "logps/chosen": -2.347921371459961, "logps/rejected": -4.222370147705078, "loss": 0.6233, "nll_loss": 0.5963386297225952, "rewards/accuracies": 1.0, "rewards/chosen": -0.23479214310646057, "rewards/margins": 0.18744486570358276, "rewards/rejected": -0.42223700881004333, "step": 7007 }, { "epoch": 19.186858316221766, "grad_norm": 3.932241439819336, "learning_rate": 4e-08, "log_odds_chosen": 0.8587146997451782, "log_odds_ratio": -0.43710869550704956, "logits/chosen": 1.2244772911071777, "logits/rejected": 1.1785359382629395, "logps/chosen": -1.325512409210205, "logps/rejected": -2.062657356262207, "loss": 0.4787, "nll_loss": 0.43499717116355896, "rewards/accuracies": 0.875, "rewards/chosen": -0.13255123794078827, "rewards/margins": 0.07371451705694199, "rewards/rejected": -0.20626574754714966, "step": 7008 }, { "epoch": 19.189596167008897, "grad_norm": 4.415591716766357, "learning_rate": 3.986301369863013e-08, "log_odds_chosen": 2.0845046043395996, "log_odds_ratio": -0.3615085184574127, "logits/chosen": 1.1749824285507202, "logits/rejected": 1.2263141870498657, "logps/chosen": -2.6245689392089844, "logps/rejected": -4.630255222320557, "loss": 0.6551, "nll_loss": 0.6189443469047546, "rewards/accuracies": 0.875, "rewards/chosen": -0.26245689392089844, "rewards/margins": 0.20056863129138947, "rewards/rejected": -0.4630255401134491, "step": 7009 }, { "epoch": 19.19233401779603, "grad_norm": 3.1226797103881836, "learning_rate": 3.972602739726028e-08, "log_odds_chosen": 5.6961259841918945, "log_odds_ratio": -0.0815172791481018, "logits/chosen": 1.1038005352020264, "logits/rejected": 1.128003478050232, "logps/chosen": -1.7809771299362183, "logps/rejected": -7.273922920227051, "loss": 0.5508, "nll_loss": 0.5426501631736755, "rewards/accuracies": 1.0, "rewards/chosen": -0.17809772491455078, "rewards/margins": 0.549294650554657, "rewards/rejected": -0.727392315864563, "step": 7010 }, { "epoch": 19.195071868583163, "grad_norm": 3.6332309246063232, "learning_rate": 3.958904109589041e-08, "log_odds_chosen": 1.8415743112564087, "log_odds_ratio": -0.3617628514766693, "logits/chosen": 1.1228944063186646, "logits/rejected": 1.109073519706726, "logps/chosen": -1.6116039752960205, "logps/rejected": -3.319662094116211, "loss": 0.4934, "nll_loss": 0.45722436904907227, "rewards/accuracies": 0.75, "rewards/chosen": -0.161160409450531, "rewards/margins": 0.17080578207969666, "rewards/rejected": -0.3319661617279053, "step": 7011 }, { "epoch": 19.197809719370294, "grad_norm": 3.1106791496276855, "learning_rate": 3.945205479452054e-08, "log_odds_chosen": 3.7383012771606445, "log_odds_ratio": -0.1381990909576416, "logits/chosen": 1.2629547119140625, "logits/rejected": 1.2515537738800049, "logps/chosen": -1.2894916534423828, "logps/rejected": -4.611884593963623, "loss": 0.4814, "nll_loss": 0.4675840139389038, "rewards/accuracies": 1.0, "rewards/chosen": -0.12894916534423828, "rewards/margins": 0.3322393298149109, "rewards/rejected": -0.46118852496147156, "step": 7012 }, { "epoch": 19.200547570157426, "grad_norm": 6.014978885650635, "learning_rate": 3.931506849315068e-08, "log_odds_chosen": 1.0017820596694946, "log_odds_ratio": -0.6097390651702881, "logits/chosen": 0.6999231576919556, "logits/rejected": 0.7180201411247253, "logps/chosen": -2.3564066886901855, "logps/rejected": -3.2775912284851074, "loss": 0.5554, "nll_loss": 0.49438995122909546, "rewards/accuracies": 0.875, "rewards/chosen": -0.23564067482948303, "rewards/margins": 0.09211844205856323, "rewards/rejected": -0.32775911688804626, "step": 7013 }, { "epoch": 19.20328542094456, "grad_norm": 2.9896435737609863, "learning_rate": 3.917808219178082e-08, "log_odds_chosen": 2.7076616287231445, "log_odds_ratio": -0.14262787997722626, "logits/chosen": 1.0095202922821045, "logits/rejected": 1.0079978704452515, "logps/chosen": -1.6148145198822021, "logps/rejected": -4.101181983947754, "loss": 0.4375, "nll_loss": 0.4232634902000427, "rewards/accuracies": 1.0, "rewards/chosen": -0.16148145496845245, "rewards/margins": 0.24863673746585846, "rewards/rejected": -0.4101181924343109, "step": 7014 }, { "epoch": 19.20602327173169, "grad_norm": 3.4304938316345215, "learning_rate": 3.904109589041096e-08, "log_odds_chosen": 2.4740800857543945, "log_odds_ratio": -0.1938157081604004, "logits/chosen": 1.2638593912124634, "logits/rejected": 1.186335563659668, "logps/chosen": -1.8815093040466309, "logps/rejected": -4.149644374847412, "loss": 0.5839, "nll_loss": 0.5645601153373718, "rewards/accuracies": 1.0, "rewards/chosen": -0.18815094232559204, "rewards/margins": 0.22681351006031036, "rewards/rejected": -0.4149644374847412, "step": 7015 }, { "epoch": 19.208761122518823, "grad_norm": 3.4247498512268066, "learning_rate": 3.890410958904109e-08, "log_odds_chosen": 3.723813056945801, "log_odds_ratio": -0.1232602447271347, "logits/chosen": 1.2880783081054688, "logits/rejected": 1.2479554414749146, "logps/chosen": -1.5205966234207153, "logps/rejected": -5.00563383102417, "loss": 0.5819, "nll_loss": 0.5696053504943848, "rewards/accuracies": 1.0, "rewards/chosen": -0.1520596742630005, "rewards/margins": 0.3485037088394165, "rewards/rejected": -0.500563383102417, "step": 7016 }, { "epoch": 19.211498973305954, "grad_norm": 8.27557373046875, "learning_rate": 3.876712328767123e-08, "log_odds_chosen": 2.8079755306243896, "log_odds_ratio": -0.27186042070388794, "logits/chosen": 1.2391362190246582, "logits/rejected": 1.17767333984375, "logps/chosen": -1.5409650802612305, "logps/rejected": -4.040401458740234, "loss": 0.4916, "nll_loss": 0.4643867611885071, "rewards/accuracies": 0.875, "rewards/chosen": -0.15409649908542633, "rewards/margins": 0.24994365870952606, "rewards/rejected": -0.40404012799263, "step": 7017 }, { "epoch": 19.21423682409309, "grad_norm": 3.61580228805542, "learning_rate": 3.863013698630137e-08, "log_odds_chosen": 3.4705796241760254, "log_odds_ratio": -0.17562150955200195, "logits/chosen": 0.9259873032569885, "logits/rejected": 0.8996164798736572, "logps/chosen": -1.3914039134979248, "logps/rejected": -4.620906829833984, "loss": 0.4147, "nll_loss": 0.39712420105934143, "rewards/accuracies": 1.0, "rewards/chosen": -0.13914039731025696, "rewards/margins": 0.3229503035545349, "rewards/rejected": -0.46209070086479187, "step": 7018 }, { "epoch": 19.21697467488022, "grad_norm": 5.905646324157715, "learning_rate": 3.84931506849315e-08, "log_odds_chosen": 1.3741052150726318, "log_odds_ratio": -0.47538256645202637, "logits/chosen": 0.9316767454147339, "logits/rejected": 0.9439460039138794, "logps/chosen": -2.3472259044647217, "logps/rejected": -3.59395170211792, "loss": 0.5237, "nll_loss": 0.47613102197647095, "rewards/accuracies": 0.75, "rewards/chosen": -0.23472259938716888, "rewards/margins": 0.12467256188392639, "rewards/rejected": -0.3593951463699341, "step": 7019 }, { "epoch": 19.21971252566735, "grad_norm": 4.526071071624756, "learning_rate": 3.835616438356165e-08, "log_odds_chosen": 0.8020736575126648, "log_odds_ratio": -0.5458673238754272, "logits/chosen": 1.1505206823349, "logits/rejected": 1.0542261600494385, "logps/chosen": -1.6399517059326172, "logps/rejected": -2.2475061416625977, "loss": 0.4981, "nll_loss": 0.4435573220252991, "rewards/accuracies": 0.875, "rewards/chosen": -0.163995161652565, "rewards/margins": 0.06075545400381088, "rewards/rejected": -0.22475063800811768, "step": 7020 }, { "epoch": 19.222450376454482, "grad_norm": 4.110603332519531, "learning_rate": 3.821917808219178e-08, "log_odds_chosen": 4.500519275665283, "log_odds_ratio": -0.2987815737724304, "logits/chosen": 0.9190486073493958, "logits/rejected": 0.9244673252105713, "logps/chosen": -1.9304344654083252, "logps/rejected": -6.274400234222412, "loss": 0.6549, "nll_loss": 0.6250705718994141, "rewards/accuracies": 0.875, "rewards/chosen": -0.19304345548152924, "rewards/margins": 0.43439656496047974, "rewards/rejected": -0.6274400353431702, "step": 7021 }, { "epoch": 19.225188227241617, "grad_norm": 4.155876636505127, "learning_rate": 3.808219178082191e-08, "log_odds_chosen": 2.6310131549835205, "log_odds_ratio": -0.29358580708503723, "logits/chosen": 1.2881343364715576, "logits/rejected": 1.271064043045044, "logps/chosen": -1.5339510440826416, "logps/rejected": -4.010162353515625, "loss": 0.5487, "nll_loss": 0.5193839073181152, "rewards/accuracies": 1.0, "rewards/chosen": -0.15339508652687073, "rewards/margins": 0.24762111902236938, "rewards/rejected": -0.4010162353515625, "step": 7022 }, { "epoch": 19.227926078028748, "grad_norm": 3.4581706523895264, "learning_rate": 3.794520547945206e-08, "log_odds_chosen": 2.8053884506225586, "log_odds_ratio": -0.20747452974319458, "logits/chosen": 1.2283384799957275, "logits/rejected": 1.1756975650787354, "logps/chosen": -1.728731632232666, "logps/rejected": -4.332647800445557, "loss": 0.5269, "nll_loss": 0.5061972141265869, "rewards/accuracies": 1.0, "rewards/chosen": -0.17287318408489227, "rewards/margins": 0.26039162278175354, "rewards/rejected": -0.4332647919654846, "step": 7023 }, { "epoch": 19.23066392881588, "grad_norm": 3.9779460430145264, "learning_rate": 3.780821917808219e-08, "log_odds_chosen": 2.1654019355773926, "log_odds_ratio": -0.24468529224395752, "logits/chosen": 1.1400808095932007, "logits/rejected": 1.1159347295761108, "logps/chosen": -1.4544117450714111, "logps/rejected": -3.387361526489258, "loss": 0.4843, "nll_loss": 0.45986396074295044, "rewards/accuracies": 1.0, "rewards/chosen": -0.1454411745071411, "rewards/margins": 0.19329500198364258, "rewards/rejected": -0.3387361466884613, "step": 7024 }, { "epoch": 19.23340177960301, "grad_norm": 3.808471918106079, "learning_rate": 3.767123287671233e-08, "log_odds_chosen": 2.784886360168457, "log_odds_ratio": -0.19435381889343262, "logits/chosen": 1.1122400760650635, "logits/rejected": 1.0703628063201904, "logps/chosen": -1.7498035430908203, "logps/rejected": -4.3367204666137695, "loss": 0.4705, "nll_loss": 0.45103368163108826, "rewards/accuracies": 1.0, "rewards/chosen": -0.17498035728931427, "rewards/margins": 0.2586917281150818, "rewards/rejected": -0.43367207050323486, "step": 7025 }, { "epoch": 19.236139630390145, "grad_norm": 4.342434883117676, "learning_rate": 3.753424657534247e-08, "log_odds_chosen": 1.9774123430252075, "log_odds_ratio": -0.2901840806007385, "logits/chosen": 1.2146131992340088, "logits/rejected": 1.3322499990463257, "logps/chosen": -1.9754793643951416, "logps/rejected": -3.755504608154297, "loss": 0.5789, "nll_loss": 0.5499004125595093, "rewards/accuracies": 1.0, "rewards/chosen": -0.19754794239997864, "rewards/margins": 0.17800253629684448, "rewards/rejected": -0.3755504786968231, "step": 7026 }, { "epoch": 19.238877481177276, "grad_norm": 3.2768006324768066, "learning_rate": 3.73972602739726e-08, "log_odds_chosen": 3.0128159523010254, "log_odds_ratio": -0.17353735864162445, "logits/chosen": 1.1525999307632446, "logits/rejected": 1.1017986536026, "logps/chosen": -1.4448773860931396, "logps/rejected": -4.195179462432861, "loss": 0.5396, "nll_loss": 0.5222327709197998, "rewards/accuracies": 1.0, "rewards/chosen": -0.14448773860931396, "rewards/margins": 0.2750301957130432, "rewards/rejected": -0.4195179343223572, "step": 7027 }, { "epoch": 19.241615331964407, "grad_norm": 3.4981679916381836, "learning_rate": 3.726027397260274e-08, "log_odds_chosen": 4.040793418884277, "log_odds_ratio": -0.11968246102333069, "logits/chosen": 0.9200698137283325, "logits/rejected": 0.9643220901489258, "logps/chosen": -1.5275236368179321, "logps/rejected": -5.34276008605957, "loss": 0.5329, "nll_loss": 0.520976722240448, "rewards/accuracies": 1.0, "rewards/chosen": -0.1527523696422577, "rewards/margins": 0.38152366876602173, "rewards/rejected": -0.534276008605957, "step": 7028 }, { "epoch": 19.24435318275154, "grad_norm": 4.528743267059326, "learning_rate": 3.712328767123288e-08, "log_odds_chosen": 3.417072057723999, "log_odds_ratio": -0.1698193997144699, "logits/chosen": 1.2072607278823853, "logits/rejected": 1.1618205308914185, "logps/chosen": -1.8657643795013428, "logps/rejected": -5.075807571411133, "loss": 0.5365, "nll_loss": 0.5195232629776001, "rewards/accuracies": 1.0, "rewards/chosen": -0.18657642602920532, "rewards/margins": 0.32100433111190796, "rewards/rejected": -0.5075807571411133, "step": 7029 }, { "epoch": 19.247091033538673, "grad_norm": 6.058062553405762, "learning_rate": 3.6986301369863016e-08, "log_odds_chosen": 0.829367995262146, "log_odds_ratio": -0.5250012874603271, "logits/chosen": 1.0804710388183594, "logits/rejected": 1.085587978363037, "logps/chosen": -2.177945613861084, "logps/rejected": -2.9273438453674316, "loss": 0.5694, "nll_loss": 0.5168591737747192, "rewards/accuracies": 0.875, "rewards/chosen": -0.21779458224773407, "rewards/margins": 0.0749397948384285, "rewards/rejected": -0.29273438453674316, "step": 7030 }, { "epoch": 19.249828884325805, "grad_norm": 5.264194965362549, "learning_rate": 3.684931506849315e-08, "log_odds_chosen": 5.30156946182251, "log_odds_ratio": -0.18836992979049683, "logits/chosen": 1.2080943584442139, "logits/rejected": 1.2786117792129517, "logps/chosen": -2.359053611755371, "logps/rejected": -7.505290985107422, "loss": 0.571, "nll_loss": 0.552179217338562, "rewards/accuracies": 0.875, "rewards/chosen": -0.23590536415576935, "rewards/margins": 0.5146237015724182, "rewards/rejected": -0.7505290508270264, "step": 7031 }, { "epoch": 19.252566735112936, "grad_norm": 3.677513360977173, "learning_rate": 3.671232876712329e-08, "log_odds_chosen": 1.9507216215133667, "log_odds_ratio": -0.19601961970329285, "logits/chosen": 0.9805101752281189, "logits/rejected": 0.9351593852043152, "logps/chosen": -1.39735746383667, "logps/rejected": -3.06905460357666, "loss": 0.3934, "nll_loss": 0.37377530336380005, "rewards/accuracies": 1.0, "rewards/chosen": -0.13973575830459595, "rewards/margins": 0.1671697199344635, "rewards/rejected": -0.30690547823905945, "step": 7032 }, { "epoch": 19.255304585900067, "grad_norm": 6.1905293464660645, "learning_rate": 3.6575342465753426e-08, "log_odds_chosen": 1.448830008506775, "log_odds_ratio": -0.3821831941604614, "logits/chosen": 0.9659384489059448, "logits/rejected": 0.8623178005218506, "logps/chosen": -1.950897216796875, "logps/rejected": -3.2602100372314453, "loss": 0.5364, "nll_loss": 0.4981729984283447, "rewards/accuracies": 0.875, "rewards/chosen": -0.19508972764015198, "rewards/margins": 0.1309312880039215, "rewards/rejected": -0.3260210156440735, "step": 7033 }, { "epoch": 19.2580424366872, "grad_norm": 6.568496227264404, "learning_rate": 3.643835616438356e-08, "log_odds_chosen": 3.488499641418457, "log_odds_ratio": -0.24019622802734375, "logits/chosen": 1.191693663597107, "logits/rejected": 1.2180447578430176, "logps/chosen": -1.7639546394348145, "logps/rejected": -5.082351207733154, "loss": 0.5555, "nll_loss": 0.531485378742218, "rewards/accuracies": 0.875, "rewards/chosen": -0.1763954758644104, "rewards/margins": 0.3318396210670471, "rewards/rejected": -0.5082350969314575, "step": 7034 }, { "epoch": 19.260780287474333, "grad_norm": 4.666140079498291, "learning_rate": 3.6301369863013697e-08, "log_odds_chosen": 3.1187870502471924, "log_odds_ratio": -0.1927325576543808, "logits/chosen": 0.9378052353858948, "logits/rejected": 0.9698043465614319, "logps/chosen": -2.0348711013793945, "logps/rejected": -4.977125644683838, "loss": 0.7152, "nll_loss": 0.6959167122840881, "rewards/accuracies": 1.0, "rewards/chosen": -0.2034870982170105, "rewards/margins": 0.29422545433044434, "rewards/rejected": -0.49771255254745483, "step": 7035 }, { "epoch": 19.263518138261464, "grad_norm": 3.9717326164245605, "learning_rate": 3.6164383561643835e-08, "log_odds_chosen": 1.6406110525131226, "log_odds_ratio": -0.3317156434059143, "logits/chosen": 1.0097981691360474, "logits/rejected": 1.058736801147461, "logps/chosen": -1.9032961130142212, "logps/rejected": -3.4333534240722656, "loss": 0.5254, "nll_loss": 0.49221271276474, "rewards/accuracies": 0.875, "rewards/chosen": -0.1903296262025833, "rewards/margins": 0.1530057191848755, "rewards/rejected": -0.3433353304862976, "step": 7036 }, { "epoch": 19.266255989048595, "grad_norm": 4.474832534790039, "learning_rate": 3.602739726027397e-08, "log_odds_chosen": 3.6181581020355225, "log_odds_ratio": -0.11814256757497787, "logits/chosen": 1.1955139636993408, "logits/rejected": 1.2726037502288818, "logps/chosen": -2.06247878074646, "logps/rejected": -5.495573997497559, "loss": 0.6365, "nll_loss": 0.6247011423110962, "rewards/accuracies": 1.0, "rewards/chosen": -0.20624789595603943, "rewards/margins": 0.3433094918727875, "rewards/rejected": -0.5495573878288269, "step": 7037 }, { "epoch": 19.26899383983573, "grad_norm": 3.723696231842041, "learning_rate": 3.5890410958904107e-08, "log_odds_chosen": 2.2499241828918457, "log_odds_ratio": -0.19177120923995972, "logits/chosen": 0.8164312243461609, "logits/rejected": 0.8452358245849609, "logps/chosen": -2.282348871231079, "logps/rejected": -4.3938069343566895, "loss": 0.5538, "nll_loss": 0.5346131324768066, "rewards/accuracies": 1.0, "rewards/chosen": -0.2282349020242691, "rewards/margins": 0.21114583313465118, "rewards/rejected": -0.4393807053565979, "step": 7038 }, { "epoch": 19.27173169062286, "grad_norm": 4.229410171508789, "learning_rate": 3.5753424657534245e-08, "log_odds_chosen": 1.3188934326171875, "log_odds_ratio": -0.3841773569583893, "logits/chosen": 0.9159209728240967, "logits/rejected": 0.9533445835113525, "logps/chosen": -2.1793341636657715, "logps/rejected": -3.403512954711914, "loss": 0.5622, "nll_loss": 0.5237367749214172, "rewards/accuracies": 0.75, "rewards/chosen": -0.21793340146541595, "rewards/margins": 0.12241790443658829, "rewards/rejected": -0.3403513431549072, "step": 7039 }, { "epoch": 19.274469541409992, "grad_norm": 3.0039753913879395, "learning_rate": 3.5616438356164384e-08, "log_odds_chosen": 2.963562488555908, "log_odds_ratio": -0.1358374059200287, "logits/chosen": 1.2978904247283936, "logits/rejected": 1.2715238332748413, "logps/chosen": -1.6729400157928467, "logps/rejected": -4.443563938140869, "loss": 0.5052, "nll_loss": 0.4915950298309326, "rewards/accuracies": 1.0, "rewards/chosen": -0.16729401051998138, "rewards/margins": 0.27706241607666016, "rewards/rejected": -0.44435641169548035, "step": 7040 }, { "epoch": 19.277207392197127, "grad_norm": 3.4325950145721436, "learning_rate": 3.5479452054794516e-08, "log_odds_chosen": 3.567963123321533, "log_odds_ratio": -0.07819793373346329, "logits/chosen": 0.8901253342628479, "logits/rejected": 0.9044824838638306, "logps/chosen": -1.6538172960281372, "logps/rejected": -4.90052604675293, "loss": 0.46, "nll_loss": 0.45215022563934326, "rewards/accuracies": 1.0, "rewards/chosen": -0.16538172960281372, "rewards/margins": 0.3246708810329437, "rewards/rejected": -0.49005258083343506, "step": 7041 }, { "epoch": 19.279945242984258, "grad_norm": 3.4130070209503174, "learning_rate": 3.5342465753424655e-08, "log_odds_chosen": 2.724297046661377, "log_odds_ratio": -0.196446493268013, "logits/chosen": 1.0310519933700562, "logits/rejected": 1.0322015285491943, "logps/chosen": -1.3197051286697388, "logps/rejected": -3.8143486976623535, "loss": 0.4551, "nll_loss": 0.4354931712150574, "rewards/accuracies": 1.0, "rewards/chosen": -0.13197052478790283, "rewards/margins": 0.24946437776088715, "rewards/rejected": -0.3814348578453064, "step": 7042 }, { "epoch": 19.28268309377139, "grad_norm": 3.824537515640259, "learning_rate": 3.5205479452054794e-08, "log_odds_chosen": 2.8019134998321533, "log_odds_ratio": -0.1280023753643036, "logits/chosen": 0.9610264301300049, "logits/rejected": 0.9746736288070679, "logps/chosen": -1.677138328552246, "logps/rejected": -4.287847518920898, "loss": 0.5883, "nll_loss": 0.575529158115387, "rewards/accuracies": 1.0, "rewards/chosen": -0.16771383583545685, "rewards/margins": 0.2610709071159363, "rewards/rejected": -0.42878472805023193, "step": 7043 }, { "epoch": 19.28542094455852, "grad_norm": 5.978147983551025, "learning_rate": 3.5068493150684926e-08, "log_odds_chosen": 1.619442343711853, "log_odds_ratio": -0.40500253438949585, "logits/chosen": 1.288596510887146, "logits/rejected": 1.232517957687378, "logps/chosen": -1.9113359451293945, "logps/rejected": -3.345299243927002, "loss": 0.482, "nll_loss": 0.4415327310562134, "rewards/accuracies": 0.75, "rewards/chosen": -0.19113360345363617, "rewards/margins": 0.14339634776115417, "rewards/rejected": -0.33452993631362915, "step": 7044 }, { "epoch": 19.288158795345655, "grad_norm": 8.955878257751465, "learning_rate": 3.493150684931507e-08, "log_odds_chosen": 2.9688100814819336, "log_odds_ratio": -0.5074055790901184, "logits/chosen": 1.1502411365509033, "logits/rejected": 1.1696417331695557, "logps/chosen": -2.32773494720459, "logps/rejected": -5.048658847808838, "loss": 0.6782, "nll_loss": 0.6275007724761963, "rewards/accuracies": 0.875, "rewards/chosen": -0.23277351260185242, "rewards/margins": 0.27209237217903137, "rewards/rejected": -0.5048658847808838, "step": 7045 }, { "epoch": 19.290896646132786, "grad_norm": 3.5550546646118164, "learning_rate": 3.4794520547945204e-08, "log_odds_chosen": 3.54591965675354, "log_odds_ratio": -0.1817038506269455, "logits/chosen": 1.2517787218093872, "logits/rejected": 1.1571520566940308, "logps/chosen": -1.5127484798431396, "logps/rejected": -4.832624912261963, "loss": 0.481, "nll_loss": 0.4627830684185028, "rewards/accuracies": 1.0, "rewards/chosen": -0.15127485990524292, "rewards/margins": 0.3319876492023468, "rewards/rejected": -0.4832625091075897, "step": 7046 }, { "epoch": 19.293634496919918, "grad_norm": 3.650416135787964, "learning_rate": 3.4657534246575336e-08, "log_odds_chosen": 2.6349024772644043, "log_odds_ratio": -0.20698793232440948, "logits/chosen": 0.9817010760307312, "logits/rejected": 0.9734044075012207, "logps/chosen": -1.7574989795684814, "logps/rejected": -4.155251502990723, "loss": 0.5346, "nll_loss": 0.5139296054840088, "rewards/accuracies": 0.875, "rewards/chosen": -0.17574989795684814, "rewards/margins": 0.23977524042129517, "rewards/rejected": -0.4155251383781433, "step": 7047 }, { "epoch": 19.29637234770705, "grad_norm": 3.6380162239074707, "learning_rate": 3.452054794520548e-08, "log_odds_chosen": 2.9356415271759033, "log_odds_ratio": -0.2255416363477707, "logits/chosen": 1.0083602666854858, "logits/rejected": 0.861763596534729, "logps/chosen": -1.3390157222747803, "logps/rejected": -4.05646276473999, "loss": 0.5357, "nll_loss": 0.5131239295005798, "rewards/accuracies": 1.0, "rewards/chosen": -0.13390156626701355, "rewards/margins": 0.27174466848373413, "rewards/rejected": -0.40564626455307007, "step": 7048 }, { "epoch": 19.299110198494184, "grad_norm": 4.214555740356445, "learning_rate": 3.4383561643835614e-08, "log_odds_chosen": 3.2675909996032715, "log_odds_ratio": -0.2568736672401428, "logits/chosen": 0.936863124370575, "logits/rejected": 0.9651464223861694, "logps/chosen": -2.4781389236450195, "logps/rejected": -5.64503288269043, "loss": 0.5964, "nll_loss": 0.5706874132156372, "rewards/accuracies": 0.875, "rewards/chosen": -0.24781391024589539, "rewards/margins": 0.3166894316673279, "rewards/rejected": -0.5645033121109009, "step": 7049 }, { "epoch": 19.301848049281315, "grad_norm": 3.801065683364868, "learning_rate": 3.424657534246575e-08, "log_odds_chosen": 1.7048546075820923, "log_odds_ratio": -0.2195688635110855, "logits/chosen": 0.9600174427032471, "logits/rejected": 0.9156308174133301, "logps/chosen": -1.742379903793335, "logps/rejected": -3.2725651264190674, "loss": 0.5023, "nll_loss": 0.48039135336875916, "rewards/accuracies": 1.0, "rewards/chosen": -0.17423799633979797, "rewards/margins": 0.153018519282341, "rewards/rejected": -0.3272565007209778, "step": 7050 }, { "epoch": 19.304585900068446, "grad_norm": 6.726372718811035, "learning_rate": 3.410958904109589e-08, "log_odds_chosen": 3.295921564102173, "log_odds_ratio": -0.09666454792022705, "logits/chosen": 1.254784107208252, "logits/rejected": 1.3123180866241455, "logps/chosen": -2.2949893474578857, "logps/rejected": -5.378806114196777, "loss": 0.5572, "nll_loss": 0.5475482940673828, "rewards/accuracies": 1.0, "rewards/chosen": -0.2294989377260208, "rewards/margins": 0.30838167667388916, "rewards/rejected": -0.5378805994987488, "step": 7051 }, { "epoch": 19.307323750855577, "grad_norm": 3.196484327316284, "learning_rate": 3.3972602739726024e-08, "log_odds_chosen": 3.754042863845825, "log_odds_ratio": -0.11073232442140579, "logits/chosen": 1.0389527082443237, "logits/rejected": 1.0923047065734863, "logps/chosen": -1.5897109508514404, "logps/rejected": -5.127732276916504, "loss": 0.468, "nll_loss": 0.45692723989486694, "rewards/accuracies": 1.0, "rewards/chosen": -0.15897108614444733, "rewards/margins": 0.3538021445274353, "rewards/rejected": -0.5127732157707214, "step": 7052 }, { "epoch": 19.310061601642712, "grad_norm": 4.018478870391846, "learning_rate": 3.383561643835616e-08, "log_odds_chosen": 3.064607620239258, "log_odds_ratio": -0.09370840340852737, "logits/chosen": 0.9838518500328064, "logits/rejected": 1.0021629333496094, "logps/chosen": -2.066941499710083, "logps/rejected": -4.965666770935059, "loss": 0.5069, "nll_loss": 0.49754828214645386, "rewards/accuracies": 1.0, "rewards/chosen": -0.20669415593147278, "rewards/margins": 0.28987252712249756, "rewards/rejected": -0.49656668305397034, "step": 7053 }, { "epoch": 19.312799452429843, "grad_norm": 3.4888863563537598, "learning_rate": 3.36986301369863e-08, "log_odds_chosen": 4.916504383087158, "log_odds_ratio": -0.08898670971393585, "logits/chosen": 1.005232810974121, "logits/rejected": 1.0011916160583496, "logps/chosen": -1.435286521911621, "logps/rejected": -5.859961032867432, "loss": 0.5258, "nll_loss": 0.5168762803077698, "rewards/accuracies": 1.0, "rewards/chosen": -0.14352864027023315, "rewards/margins": 0.44246745109558105, "rewards/rejected": -0.5859960913658142, "step": 7054 }, { "epoch": 19.315537303216974, "grad_norm": 4.302971839904785, "learning_rate": 3.356164383561644e-08, "log_odds_chosen": 2.189329147338867, "log_odds_ratio": -0.5280895233154297, "logits/chosen": 0.8671730756759644, "logits/rejected": 0.8103440999984741, "logps/chosen": -1.9785103797912598, "logps/rejected": -4.029749870300293, "loss": 0.6035, "nll_loss": 0.5507186055183411, "rewards/accuracies": 0.625, "rewards/chosen": -0.1978510320186615, "rewards/margins": 0.2051239311695099, "rewards/rejected": -0.4029749631881714, "step": 7055 }, { "epoch": 19.318275154004105, "grad_norm": 3.656144857406616, "learning_rate": 3.342465753424657e-08, "log_odds_chosen": 2.3393948078155518, "log_odds_ratio": -0.19703863561153412, "logits/chosen": 1.0083298683166504, "logits/rejected": 0.9709921479225159, "logps/chosen": -1.6803948879241943, "logps/rejected": -3.7807979583740234, "loss": 0.487, "nll_loss": 0.46732884645462036, "rewards/accuracies": 1.0, "rewards/chosen": -0.168039470911026, "rewards/margins": 0.21004031598567963, "rewards/rejected": -0.37807977199554443, "step": 7056 }, { "epoch": 19.32101300479124, "grad_norm": 3.638298749923706, "learning_rate": 3.328767123287671e-08, "log_odds_chosen": 3.319174289703369, "log_odds_ratio": -0.13331599533557892, "logits/chosen": 0.9891880750656128, "logits/rejected": 0.9587979912757874, "logps/chosen": -1.7487300634384155, "logps/rejected": -4.87510871887207, "loss": 0.4458, "nll_loss": 0.4324198365211487, "rewards/accuracies": 1.0, "rewards/chosen": -0.1748729944229126, "rewards/margins": 0.3126378357410431, "rewards/rejected": -0.4875108599662781, "step": 7057 }, { "epoch": 19.32375085557837, "grad_norm": 3.6647863388061523, "learning_rate": 3.315068493150685e-08, "log_odds_chosen": 1.741758108139038, "log_odds_ratio": -0.2883059084415436, "logits/chosen": 1.4113454818725586, "logits/rejected": 1.3628695011138916, "logps/chosen": -1.7938227653503418, "logps/rejected": -3.3703036308288574, "loss": 0.4171, "nll_loss": 0.38822129368782043, "rewards/accuracies": 0.875, "rewards/chosen": -0.17938226461410522, "rewards/margins": 0.15764810144901276, "rewards/rejected": -0.3370303809642792, "step": 7058 }, { "epoch": 19.326488706365502, "grad_norm": 3.8778140544891357, "learning_rate": 3.301369863013698e-08, "log_odds_chosen": 2.364314079284668, "log_odds_ratio": -0.23100188374519348, "logits/chosen": 0.9844301342964172, "logits/rejected": 0.9887270331382751, "logps/chosen": -1.7567998170852661, "logps/rejected": -3.9516592025756836, "loss": 0.4445, "nll_loss": 0.42137444019317627, "rewards/accuracies": 1.0, "rewards/chosen": -0.1756799817085266, "rewards/margins": 0.21948593854904175, "rewards/rejected": -0.39516592025756836, "step": 7059 }, { "epoch": 19.329226557152634, "grad_norm": 3.061154365539551, "learning_rate": 3.287671232876712e-08, "log_odds_chosen": 4.2239089012146, "log_odds_ratio": -0.06341037154197693, "logits/chosen": 1.1009135246276855, "logits/rejected": 1.076797604560852, "logps/chosen": -1.522049069404602, "logps/rejected": -5.482475280761719, "loss": 0.6066, "nll_loss": 0.600211501121521, "rewards/accuracies": 1.0, "rewards/chosen": -0.15220490097999573, "rewards/margins": 0.3960425853729248, "rewards/rejected": -0.5482475161552429, "step": 7060 }, { "epoch": 19.33196440793977, "grad_norm": 5.339170932769775, "learning_rate": 3.273972602739726e-08, "log_odds_chosen": 1.6557579040527344, "log_odds_ratio": -0.318599671125412, "logits/chosen": 0.9447926878929138, "logits/rejected": 0.8817811012268066, "logps/chosen": -1.9947471618652344, "logps/rejected": -3.457752227783203, "loss": 0.5054, "nll_loss": 0.47353655099868774, "rewards/accuracies": 1.0, "rewards/chosen": -0.19947472214698792, "rewards/margins": 0.1463005244731903, "rewards/rejected": -0.34577521681785583, "step": 7061 }, { "epoch": 19.3347022587269, "grad_norm": 4.437682628631592, "learning_rate": 3.260273972602739e-08, "log_odds_chosen": 1.6438970565795898, "log_odds_ratio": -0.26975342631340027, "logits/chosen": 0.7637014985084534, "logits/rejected": 0.7331397533416748, "logps/chosen": -2.0658860206604004, "logps/rejected": -3.5877792835235596, "loss": 0.5337, "nll_loss": 0.5067193508148193, "rewards/accuracies": 1.0, "rewards/chosen": -0.20658859610557556, "rewards/margins": 0.15218931436538696, "rewards/rejected": -0.3587779104709625, "step": 7062 }, { "epoch": 19.33744010951403, "grad_norm": 3.4388654232025146, "learning_rate": 3.246575342465753e-08, "log_odds_chosen": 1.764475703239441, "log_odds_ratio": -0.22434143722057343, "logits/chosen": 1.010685682296753, "logits/rejected": 0.9557527899742126, "logps/chosen": -1.6660070419311523, "logps/rejected": -3.273292303085327, "loss": 0.4503, "nll_loss": 0.4278703033924103, "rewards/accuracies": 1.0, "rewards/chosen": -0.16660070419311523, "rewards/margins": 0.16072852909564972, "rewards/rejected": -0.32732927799224854, "step": 7063 }, { "epoch": 19.340177960301162, "grad_norm": 3.7286343574523926, "learning_rate": 3.232876712328767e-08, "log_odds_chosen": 3.779405117034912, "log_odds_ratio": -0.132030189037323, "logits/chosen": 0.8481404185295105, "logits/rejected": 0.759494423866272, "logps/chosen": -1.1354610919952393, "logps/rejected": -4.5681233406066895, "loss": 0.4833, "nll_loss": 0.4700918197631836, "rewards/accuracies": 1.0, "rewards/chosen": -0.11354611814022064, "rewards/margins": 0.34326621890068054, "rewards/rejected": -0.45681232213974, "step": 7064 }, { "epoch": 19.342915811088297, "grad_norm": 4.5130462646484375, "learning_rate": 3.219178082191781e-08, "log_odds_chosen": 3.602186679840088, "log_odds_ratio": -0.30046749114990234, "logits/chosen": 1.2614190578460693, "logits/rejected": 1.2213153839111328, "logps/chosen": -1.9428468942642212, "logps/rejected": -5.364480018615723, "loss": 0.5199, "nll_loss": 0.4898608922958374, "rewards/accuracies": 0.75, "rewards/chosen": -0.19428467750549316, "rewards/margins": 0.3421632945537567, "rewards/rejected": -0.5364479422569275, "step": 7065 }, { "epoch": 19.345653661875428, "grad_norm": 3.662766695022583, "learning_rate": 3.205479452054794e-08, "log_odds_chosen": 5.388824462890625, "log_odds_ratio": -0.07969330996274948, "logits/chosen": 1.0560290813446045, "logits/rejected": 0.9819048643112183, "logps/chosen": -1.4447569847106934, "logps/rejected": -6.296226978302002, "loss": 0.4733, "nll_loss": 0.465302437543869, "rewards/accuracies": 1.0, "rewards/chosen": -0.14447572827339172, "rewards/margins": 0.4851469397544861, "rewards/rejected": -0.6296226978302002, "step": 7066 }, { "epoch": 19.34839151266256, "grad_norm": 3.5208358764648438, "learning_rate": 3.191780821917808e-08, "log_odds_chosen": 2.1377687454223633, "log_odds_ratio": -0.1826341301202774, "logits/chosen": 1.4356440305709839, "logits/rejected": 1.4374319314956665, "logps/chosen": -1.5452609062194824, "logps/rejected": -3.3964343070983887, "loss": 0.4585, "nll_loss": 0.4402048885822296, "rewards/accuracies": 1.0, "rewards/chosen": -0.15452608466148376, "rewards/margins": 0.18511734902858734, "rewards/rejected": -0.3396434485912323, "step": 7067 }, { "epoch": 19.351129363449694, "grad_norm": 4.09476375579834, "learning_rate": 3.178082191780822e-08, "log_odds_chosen": 4.742949485778809, "log_odds_ratio": -0.19592037796974182, "logits/chosen": 1.3199741840362549, "logits/rejected": 1.3554062843322754, "logps/chosen": -2.37762451171875, "logps/rejected": -6.9713335037231445, "loss": 0.5789, "nll_loss": 0.5593528151512146, "rewards/accuracies": 1.0, "rewards/chosen": -0.2377624660730362, "rewards/margins": 0.459370881319046, "rewards/rejected": -0.6971333622932434, "step": 7068 }, { "epoch": 19.353867214236825, "grad_norm": 4.49423885345459, "learning_rate": 3.164383561643835e-08, "log_odds_chosen": 4.213033676147461, "log_odds_ratio": -0.18745821714401245, "logits/chosen": 1.144188404083252, "logits/rejected": 1.2047767639160156, "logps/chosen": -2.4475321769714355, "logps/rejected": -6.5119547843933105, "loss": 0.6973, "nll_loss": 0.6785105466842651, "rewards/accuracies": 0.875, "rewards/chosen": -0.24475322663784027, "rewards/margins": 0.40644222497940063, "rewards/rejected": -0.6511954665184021, "step": 7069 }, { "epoch": 19.356605065023956, "grad_norm": 4.148107528686523, "learning_rate": 3.1506849315068497e-08, "log_odds_chosen": 2.85843563079834, "log_odds_ratio": -0.08935169875621796, "logits/chosen": 1.0666838884353638, "logits/rejected": 1.0423507690429688, "logps/chosen": -1.8603413105010986, "logps/rejected": -4.545801162719727, "loss": 0.48, "nll_loss": 0.4710400402545929, "rewards/accuracies": 1.0, "rewards/chosen": -0.18603414297103882, "rewards/margins": 0.2685459852218628, "rewards/rejected": -0.4545801281929016, "step": 7070 }, { "epoch": 19.359342915811087, "grad_norm": 3.6028192043304443, "learning_rate": 3.136986301369863e-08, "log_odds_chosen": 5.344681262969971, "log_odds_ratio": -0.1650945246219635, "logits/chosen": 1.196283221244812, "logits/rejected": 1.1945520639419556, "logps/chosen": -1.7986395359039307, "logps/rejected": -7.000152111053467, "loss": 0.6068, "nll_loss": 0.5902997255325317, "rewards/accuracies": 1.0, "rewards/chosen": -0.17986395955085754, "rewards/margins": 0.5201512575149536, "rewards/rejected": -0.7000152468681335, "step": 7071 }, { "epoch": 19.362080766598222, "grad_norm": 3.9257752895355225, "learning_rate": 3.123287671232877e-08, "log_odds_chosen": 1.9189584255218506, "log_odds_ratio": -0.2164355218410492, "logits/chosen": 1.2325118780136108, "logits/rejected": 1.2491135597229004, "logps/chosen": -2.159219741821289, "logps/rejected": -3.8957486152648926, "loss": 0.4568, "nll_loss": 0.4351791739463806, "rewards/accuracies": 1.0, "rewards/chosen": -0.21592199802398682, "rewards/margins": 0.17365288734436035, "rewards/rejected": -0.38957488536834717, "step": 7072 }, { "epoch": 19.364818617385353, "grad_norm": 3.8581485748291016, "learning_rate": 3.10958904109589e-08, "log_odds_chosen": 5.477210998535156, "log_odds_ratio": -0.06547500193119049, "logits/chosen": 1.0468369722366333, "logits/rejected": 0.9346164464950562, "logps/chosen": -2.249789237976074, "logps/rejected": -7.497786045074463, "loss": 0.4682, "nll_loss": 0.46162149310112, "rewards/accuracies": 1.0, "rewards/chosen": -0.22497893869876862, "rewards/margins": 0.5247997045516968, "rewards/rejected": -0.7497786283493042, "step": 7073 }, { "epoch": 19.367556468172484, "grad_norm": 4.308813571929932, "learning_rate": 3.095890410958904e-08, "log_odds_chosen": 3.7070376873016357, "log_odds_ratio": -0.23264990746974945, "logits/chosen": 1.3247573375701904, "logits/rejected": 1.3539055585861206, "logps/chosen": -1.929337739944458, "logps/rejected": -5.493494510650635, "loss": 0.5592, "nll_loss": 0.5359752178192139, "rewards/accuracies": 0.875, "rewards/chosen": -0.19293376803398132, "rewards/margins": 0.35641562938690186, "rewards/rejected": -0.5493494272232056, "step": 7074 }, { "epoch": 19.370294318959616, "grad_norm": 3.629678964614868, "learning_rate": 3.082191780821918e-08, "log_odds_chosen": 3.961399555206299, "log_odds_ratio": -0.10349015146493912, "logits/chosen": 1.135195016860962, "logits/rejected": 1.1878652572631836, "logps/chosen": -1.8794169425964355, "logps/rejected": -5.608391284942627, "loss": 0.5347, "nll_loss": 0.5243337154388428, "rewards/accuracies": 1.0, "rewards/chosen": -0.18794170022010803, "rewards/margins": 0.3728974461555481, "rewards/rejected": -0.5608391165733337, "step": 7075 }, { "epoch": 19.37303216974675, "grad_norm": 3.563791275024414, "learning_rate": 3.0684931506849316e-08, "log_odds_chosen": 3.5127713680267334, "log_odds_ratio": -0.16518262028694153, "logits/chosen": 1.2281817197799683, "logits/rejected": 1.2170823812484741, "logps/chosen": -2.0832114219665527, "logps/rejected": -5.443475246429443, "loss": 0.535, "nll_loss": 0.5185004472732544, "rewards/accuracies": 1.0, "rewards/chosen": -0.20832112431526184, "rewards/margins": 0.3360263407230377, "rewards/rejected": -0.5443475246429443, "step": 7076 }, { "epoch": 19.37577002053388, "grad_norm": 4.587374210357666, "learning_rate": 3.054794520547945e-08, "log_odds_chosen": 2.7952256202697754, "log_odds_ratio": -0.1584051251411438, "logits/chosen": 1.1960358619689941, "logits/rejected": 1.2631773948669434, "logps/chosen": -1.7616400718688965, "logps/rejected": -4.286210060119629, "loss": 0.4933, "nll_loss": 0.47746992111206055, "rewards/accuracies": 0.875, "rewards/chosen": -0.17616401612758636, "rewards/margins": 0.25245699286460876, "rewards/rejected": -0.42862099409103394, "step": 7077 }, { "epoch": 19.378507871321013, "grad_norm": 7.276895523071289, "learning_rate": 3.041095890410959e-08, "log_odds_chosen": 1.2590411901474, "log_odds_ratio": -0.5109020471572876, "logits/chosen": 1.1149920225143433, "logits/rejected": 1.1422698497772217, "logps/chosen": -2.2649760246276855, "logps/rejected": -3.493708372116089, "loss": 0.6727, "nll_loss": 0.621633768081665, "rewards/accuracies": 0.625, "rewards/chosen": -0.2264975905418396, "rewards/margins": 0.12287324666976929, "rewards/rejected": -0.3493708372116089, "step": 7078 }, { "epoch": 19.381245722108144, "grad_norm": 3.6097230911254883, "learning_rate": 3.0273972602739726e-08, "log_odds_chosen": 1.7492070198059082, "log_odds_ratio": -0.40789541602134705, "logits/chosen": 0.8978064656257629, "logits/rejected": 0.9390630125999451, "logps/chosen": -2.204951524734497, "logps/rejected": -3.849565267562866, "loss": 0.5213, "nll_loss": 0.48048362135887146, "rewards/accuracies": 0.875, "rewards/chosen": -0.22049516439437866, "rewards/margins": 0.16446135938167572, "rewards/rejected": -0.3849565386772156, "step": 7079 }, { "epoch": 19.38398357289528, "grad_norm": 3.6622984409332275, "learning_rate": 3.0136986301369865e-08, "log_odds_chosen": 2.1536202430725098, "log_odds_ratio": -0.2717409133911133, "logits/chosen": 1.1923731565475464, "logits/rejected": 1.1976964473724365, "logps/chosen": -1.6342272758483887, "logps/rejected": -3.62852144241333, "loss": 0.4646, "nll_loss": 0.43742382526397705, "rewards/accuracies": 0.875, "rewards/chosen": -0.16342273354530334, "rewards/margins": 0.19942940771579742, "rewards/rejected": -0.36285215616226196, "step": 7080 }, { "epoch": 19.38672142368241, "grad_norm": 7.268893241882324, "learning_rate": 3e-08, "log_odds_chosen": 3.3145174980163574, "log_odds_ratio": -0.17168016731739044, "logits/chosen": 1.235181450843811, "logits/rejected": 1.284544587135315, "logps/chosen": -1.8929908275604248, "logps/rejected": -5.028759002685547, "loss": 0.6136, "nll_loss": 0.5964657664299011, "rewards/accuracies": 1.0, "rewards/chosen": -0.1892990916967392, "rewards/margins": 0.3135768175125122, "rewards/rejected": -0.5028759241104126, "step": 7081 }, { "epoch": 19.38945927446954, "grad_norm": 4.2167582511901855, "learning_rate": 2.9863013698630136e-08, "log_odds_chosen": 2.279334545135498, "log_odds_ratio": -0.2313304841518402, "logits/chosen": 1.0649644136428833, "logits/rejected": 1.1019628047943115, "logps/chosen": -2.19926118850708, "logps/rejected": -4.351077079772949, "loss": 0.5882, "nll_loss": 0.5650315284729004, "rewards/accuracies": 1.0, "rewards/chosen": -0.2199261486530304, "rewards/margins": 0.2151816040277481, "rewards/rejected": -0.4351077675819397, "step": 7082 }, { "epoch": 19.392197125256672, "grad_norm": 3.9978041648864746, "learning_rate": 2.9726027397260275e-08, "log_odds_chosen": 3.8510701656341553, "log_odds_ratio": -0.11847847700119019, "logits/chosen": 0.9711487293243408, "logits/rejected": 0.9858163595199585, "logps/chosen": -2.3562378883361816, "logps/rejected": -6.079237937927246, "loss": 0.6342, "nll_loss": 0.6223089694976807, "rewards/accuracies": 1.0, "rewards/chosen": -0.2356237769126892, "rewards/margins": 0.37230002880096436, "rewards/rejected": -0.6079238653182983, "step": 7083 }, { "epoch": 19.394934976043807, "grad_norm": 3.469857692718506, "learning_rate": 2.9589041095890407e-08, "log_odds_chosen": 4.095989227294922, "log_odds_ratio": -0.04831483215093613, "logits/chosen": 1.1291269063949585, "logits/rejected": 1.1020309925079346, "logps/chosen": -1.568056344985962, "logps/rejected": -5.390118598937988, "loss": 0.5261, "nll_loss": 0.5212602615356445, "rewards/accuracies": 1.0, "rewards/chosen": -0.1568056344985962, "rewards/margins": 0.3822062909603119, "rewards/rejected": -0.5390118956565857, "step": 7084 }, { "epoch": 19.397672826830938, "grad_norm": 3.320631980895996, "learning_rate": 2.9452054794520546e-08, "log_odds_chosen": 3.857983112335205, "log_odds_ratio": -0.04141202196478844, "logits/chosen": 0.8726350665092468, "logits/rejected": 0.735095739364624, "logps/chosen": -1.9729838371276855, "logps/rejected": -5.6483917236328125, "loss": 0.6526, "nll_loss": 0.6484859585762024, "rewards/accuracies": 1.0, "rewards/chosen": -0.19729837775230408, "rewards/margins": 0.36754077672958374, "rewards/rejected": -0.5648391842842102, "step": 7085 }, { "epoch": 19.40041067761807, "grad_norm": 3.6278982162475586, "learning_rate": 2.9315068493150685e-08, "log_odds_chosen": 3.356832981109619, "log_odds_ratio": -0.1882425844669342, "logits/chosen": 1.1218504905700684, "logits/rejected": 1.1602815389633179, "logps/chosen": -2.2134311199188232, "logps/rejected": -5.442089557647705, "loss": 0.51, "nll_loss": 0.49117010831832886, "rewards/accuracies": 1.0, "rewards/chosen": -0.22134310007095337, "rewards/margins": 0.32286587357521057, "rewards/rejected": -0.5442089438438416, "step": 7086 }, { "epoch": 19.4031485284052, "grad_norm": 5.077476501464844, "learning_rate": 2.917808219178082e-08, "log_odds_chosen": 2.0531928539276123, "log_odds_ratio": -0.21911759674549103, "logits/chosen": 1.1594640016555786, "logits/rejected": 1.2123268842697144, "logps/chosen": -2.2806406021118164, "logps/rejected": -4.206655502319336, "loss": 0.6037, "nll_loss": 0.5817423462867737, "rewards/accuracies": 1.0, "rewards/chosen": -0.22806404531002045, "rewards/margins": 0.19260147213935852, "rewards/rejected": -0.42066556215286255, "step": 7087 }, { "epoch": 19.405886379192335, "grad_norm": 3.183575391769409, "learning_rate": 2.904109589041096e-08, "log_odds_chosen": 3.2629809379577637, "log_odds_ratio": -0.176059752702713, "logits/chosen": 1.0611064434051514, "logits/rejected": 1.0793876647949219, "logps/chosen": -1.8900911808013916, "logps/rejected": -4.940338134765625, "loss": 0.5717, "nll_loss": 0.5540976524353027, "rewards/accuracies": 0.875, "rewards/chosen": -0.18900911509990692, "rewards/margins": 0.3050246834754944, "rewards/rejected": -0.4940337538719177, "step": 7088 }, { "epoch": 19.408624229979466, "grad_norm": 3.975099563598633, "learning_rate": 2.890410958904109e-08, "log_odds_chosen": 1.5816539525985718, "log_odds_ratio": -0.2640751004219055, "logits/chosen": 0.8071153163909912, "logits/rejected": 0.7415679693222046, "logps/chosen": -1.8868961334228516, "logps/rejected": -3.2690038681030273, "loss": 0.5559, "nll_loss": 0.5295209884643555, "rewards/accuracies": 1.0, "rewards/chosen": -0.18868961930274963, "rewards/margins": 0.1382107436656952, "rewards/rejected": -0.3269003629684448, "step": 7089 }, { "epoch": 19.411362080766597, "grad_norm": 5.798306941986084, "learning_rate": 2.876712328767123e-08, "log_odds_chosen": 2.13242244720459, "log_odds_ratio": -0.1820589303970337, "logits/chosen": 1.2774008512496948, "logits/rejected": 1.2499984502792358, "logps/chosen": -2.1740334033966064, "logps/rejected": -4.049714088439941, "loss": 0.5781, "nll_loss": 0.5599227547645569, "rewards/accuracies": 1.0, "rewards/chosen": -0.2174033671617508, "rewards/margins": 0.1875680536031723, "rewards/rejected": -0.4049714207649231, "step": 7090 }, { "epoch": 19.41409993155373, "grad_norm": 3.2525856494903564, "learning_rate": 2.863013698630137e-08, "log_odds_chosen": 2.333460807800293, "log_odds_ratio": -0.1468401998281479, "logits/chosen": 1.2450881004333496, "logits/rejected": 1.2399708032608032, "logps/chosen": -1.8696599006652832, "logps/rejected": -3.863941192626953, "loss": 0.4261, "nll_loss": 0.41138115525245667, "rewards/accuracies": 1.0, "rewards/chosen": -0.18696600198745728, "rewards/margins": 0.19942814111709595, "rewards/rejected": -0.3863941431045532, "step": 7091 }, { "epoch": 19.416837782340863, "grad_norm": 3.8552324771881104, "learning_rate": 2.8493150684931505e-08, "log_odds_chosen": 3.67533540725708, "log_odds_ratio": -0.12895770370960236, "logits/chosen": 0.8526484966278076, "logits/rejected": 0.8340052366256714, "logps/chosen": -1.5110738277435303, "logps/rejected": -4.935823917388916, "loss": 0.4239, "nll_loss": 0.4109814763069153, "rewards/accuracies": 1.0, "rewards/chosen": -0.15110738575458527, "rewards/margins": 0.3424749970436096, "rewards/rejected": -0.4935823976993561, "step": 7092 }, { "epoch": 19.419575633127995, "grad_norm": 6.404195785522461, "learning_rate": 2.8356164383561644e-08, "log_odds_chosen": 1.8238863945007324, "log_odds_ratio": -0.32849887013435364, "logits/chosen": 1.011165738105774, "logits/rejected": 0.9442836046218872, "logps/chosen": -2.1816511154174805, "logps/rejected": -3.909130573272705, "loss": 0.4922, "nll_loss": 0.45936450362205505, "rewards/accuracies": 0.875, "rewards/chosen": -0.21816512942314148, "rewards/margins": 0.17274793982505798, "rewards/rejected": -0.39091306924819946, "step": 7093 }, { "epoch": 19.422313483915126, "grad_norm": 3.199629306793213, "learning_rate": 2.8219178082191782e-08, "log_odds_chosen": 4.4754557609558105, "log_odds_ratio": -0.07860440760850906, "logits/chosen": 1.137302041053772, "logits/rejected": 1.1546071767807007, "logps/chosen": -1.652361512184143, "logps/rejected": -5.90133810043335, "loss": 0.5205, "nll_loss": 0.5126749277114868, "rewards/accuracies": 1.0, "rewards/chosen": -0.16523614525794983, "rewards/margins": 0.424897700548172, "rewards/rejected": -0.590133786201477, "step": 7094 }, { "epoch": 19.42505133470226, "grad_norm": 3.9417262077331543, "learning_rate": 2.8082191780821915e-08, "log_odds_chosen": 1.4523845911026, "log_odds_ratio": -0.33362439274787903, "logits/chosen": 1.0668447017669678, "logits/rejected": 1.110001564025879, "logps/chosen": -1.8255995512008667, "logps/rejected": -3.1653048992156982, "loss": 0.545, "nll_loss": 0.5116101503372192, "rewards/accuracies": 0.875, "rewards/chosen": -0.18255996704101562, "rewards/margins": 0.13397052884101868, "rewards/rejected": -0.3165304958820343, "step": 7095 }, { "epoch": 19.42778918548939, "grad_norm": 3.7997331619262695, "learning_rate": 2.7945205479452053e-08, "log_odds_chosen": 1.6846016645431519, "log_odds_ratio": -0.32889774441719055, "logits/chosen": 1.0013656616210938, "logits/rejected": 0.9093828201293945, "logps/chosen": -1.792414665222168, "logps/rejected": -3.393681526184082, "loss": 0.4642, "nll_loss": 0.43129774928092957, "rewards/accuracies": 0.75, "rewards/chosen": -0.17924147844314575, "rewards/margins": 0.1601266711950302, "rewards/rejected": -0.33936816453933716, "step": 7096 }, { "epoch": 19.430527036276523, "grad_norm": 3.811586856842041, "learning_rate": 2.7808219178082192e-08, "log_odds_chosen": 5.0560479164123535, "log_odds_ratio": -0.06148998439311981, "logits/chosen": 1.1802548170089722, "logits/rejected": 1.2714650630950928, "logps/chosen": -1.964258074760437, "logps/rejected": -6.6800689697265625, "loss": 0.5652, "nll_loss": 0.5590022802352905, "rewards/accuracies": 1.0, "rewards/chosen": -0.19642581045627594, "rewards/margins": 0.4715811312198639, "rewards/rejected": -0.6680068969726562, "step": 7097 }, { "epoch": 19.433264887063654, "grad_norm": 4.869388580322266, "learning_rate": 2.7671232876712328e-08, "log_odds_chosen": 4.8042073249816895, "log_odds_ratio": -0.04405377060174942, "logits/chosen": 1.4354164600372314, "logits/rejected": 1.425748586654663, "logps/chosen": -2.1132559776306152, "logps/rejected": -6.7696533203125, "loss": 0.6023, "nll_loss": 0.5979384779930115, "rewards/accuracies": 1.0, "rewards/chosen": -0.21132561564445496, "rewards/margins": 0.46563971042633057, "rewards/rejected": -0.6769652962684631, "step": 7098 }, { "epoch": 19.43600273785079, "grad_norm": 4.433584213256836, "learning_rate": 2.7534246575342467e-08, "log_odds_chosen": 2.6853737831115723, "log_odds_ratio": -0.19023439288139343, "logits/chosen": 1.477874755859375, "logits/rejected": 1.5133056640625, "logps/chosen": -2.0645194053649902, "logps/rejected": -4.529446125030518, "loss": 0.5006, "nll_loss": 0.48154401779174805, "rewards/accuracies": 1.0, "rewards/chosen": -0.20645196735858917, "rewards/margins": 0.2464926540851593, "rewards/rejected": -0.45294463634490967, "step": 7099 }, { "epoch": 19.43874058863792, "grad_norm": 3.5620033740997314, "learning_rate": 2.73972602739726e-08, "log_odds_chosen": 2.35983943939209, "log_odds_ratio": -0.15891742706298828, "logits/chosen": 1.1264938116073608, "logits/rejected": 1.1525753736495972, "logps/chosen": -2.003427505493164, "logps/rejected": -4.168857574462891, "loss": 0.4904, "nll_loss": 0.4745016098022461, "rewards/accuracies": 1.0, "rewards/chosen": -0.20034272968769073, "rewards/margins": 0.2165430337190628, "rewards/rejected": -0.41688576340675354, "step": 7100 }, { "epoch": 19.44147843942505, "grad_norm": 4.095451354980469, "learning_rate": 2.7260273972602738e-08, "log_odds_chosen": 3.099384307861328, "log_odds_ratio": -0.1634567677974701, "logits/chosen": 0.9289886951446533, "logits/rejected": 0.9028129577636719, "logps/chosen": -1.6097054481506348, "logps/rejected": -4.443910598754883, "loss": 0.4823, "nll_loss": 0.4659086763858795, "rewards/accuracies": 1.0, "rewards/chosen": -0.160970538854599, "rewards/margins": 0.2834205627441406, "rewards/rejected": -0.44439107179641724, "step": 7101 }, { "epoch": 19.444216290212182, "grad_norm": 3.6915745735168457, "learning_rate": 2.7123287671232877e-08, "log_odds_chosen": 2.607923984527588, "log_odds_ratio": -0.24135518074035645, "logits/chosen": 1.3038580417633057, "logits/rejected": 1.2268272638320923, "logps/chosen": -1.5381718873977661, "logps/rejected": -3.9641876220703125, "loss": 0.484, "nll_loss": 0.45982128381729126, "rewards/accuracies": 1.0, "rewards/chosen": -0.15381719172000885, "rewards/margins": 0.24260160326957703, "rewards/rejected": -0.3964187800884247, "step": 7102 }, { "epoch": 19.446954140999317, "grad_norm": 3.8667705059051514, "learning_rate": 2.6986301369863012e-08, "log_odds_chosen": 1.83228600025177, "log_odds_ratio": -0.1975024789571762, "logits/chosen": 0.9728769063949585, "logits/rejected": 0.9505127668380737, "logps/chosen": -1.7566485404968262, "logps/rejected": -3.403933048248291, "loss": 0.453, "nll_loss": 0.4332357347011566, "rewards/accuracies": 1.0, "rewards/chosen": -0.17566484212875366, "rewards/margins": 0.16472846269607544, "rewards/rejected": -0.3403933048248291, "step": 7103 }, { "epoch": 19.449691991786448, "grad_norm": 5.275068759918213, "learning_rate": 2.684931506849315e-08, "log_odds_chosen": 1.3030221462249756, "log_odds_ratio": -0.30653515458106995, "logits/chosen": 1.3650968074798584, "logits/rejected": 1.3816499710083008, "logps/chosen": -2.3639936447143555, "logps/rejected": -3.5430455207824707, "loss": 0.6682, "nll_loss": 0.6375919580459595, "rewards/accuracies": 1.0, "rewards/chosen": -0.23639938235282898, "rewards/margins": 0.1179051548242569, "rewards/rejected": -0.35430455207824707, "step": 7104 }, { "epoch": 19.45242984257358, "grad_norm": 3.604060173034668, "learning_rate": 2.6712328767123287e-08, "log_odds_chosen": 2.800161361694336, "log_odds_ratio": -0.18867379426956177, "logits/chosen": 1.049643635749817, "logits/rejected": 1.0129647254943848, "logps/chosen": -2.250964403152466, "logps/rejected": -4.945566177368164, "loss": 0.5703, "nll_loss": 0.5514655113220215, "rewards/accuracies": 1.0, "rewards/chosen": -0.2250964343547821, "rewards/margins": 0.26946020126342773, "rewards/rejected": -0.49455660581588745, "step": 7105 }, { "epoch": 19.45516769336071, "grad_norm": 3.4247446060180664, "learning_rate": 2.6575342465753422e-08, "log_odds_chosen": 4.1812944412231445, "log_odds_ratio": -0.19038593769073486, "logits/chosen": 1.1140129566192627, "logits/rejected": 1.1233272552490234, "logps/chosen": -1.4333692789077759, "logps/rejected": -5.277461051940918, "loss": 0.5946, "nll_loss": 0.575565755367279, "rewards/accuracies": 1.0, "rewards/chosen": -0.1433369219303131, "rewards/margins": 0.3844091594219208, "rewards/rejected": -0.5277460813522339, "step": 7106 }, { "epoch": 19.457905544147845, "grad_norm": 7.0751543045043945, "learning_rate": 2.643835616438356e-08, "log_odds_chosen": 1.1802878379821777, "log_odds_ratio": -0.353894978761673, "logits/chosen": 1.1846892833709717, "logits/rejected": 1.0808883905410767, "logps/chosen": -1.7072757482528687, "logps/rejected": -2.714973211288452, "loss": 0.4515, "nll_loss": 0.41610872745513916, "rewards/accuracies": 1.0, "rewards/chosen": -0.17072758078575134, "rewards/margins": 0.10076974332332611, "rewards/rejected": -0.27149733901023865, "step": 7107 }, { "epoch": 19.460643394934976, "grad_norm": 10.519612312316895, "learning_rate": 2.63013698630137e-08, "log_odds_chosen": 3.2429635524749756, "log_odds_ratio": -0.18150576949119568, "logits/chosen": 1.209530234336853, "logits/rejected": 1.1925363540649414, "logps/chosen": -1.7279632091522217, "logps/rejected": -4.681033134460449, "loss": 0.5394, "nll_loss": 0.5212490558624268, "rewards/accuracies": 0.875, "rewards/chosen": -0.1727963387966156, "rewards/margins": 0.2953069806098938, "rewards/rejected": -0.468103289604187, "step": 7108 }, { "epoch": 19.463381245722108, "grad_norm": 4.684683799743652, "learning_rate": 2.6164383561643835e-08, "log_odds_chosen": 0.38766908645629883, "log_odds_ratio": -0.5688611268997192, "logits/chosen": 0.9832049608230591, "logits/rejected": 0.984042227268219, "logps/chosen": -1.969602108001709, "logps/rejected": -2.269209861755371, "loss": 0.5788, "nll_loss": 0.5219405293464661, "rewards/accuracies": 0.875, "rewards/chosen": -0.1969602257013321, "rewards/margins": 0.029960796236991882, "rewards/rejected": -0.22692102193832397, "step": 7109 }, { "epoch": 19.46611909650924, "grad_norm": 4.629293918609619, "learning_rate": 2.602739726027397e-08, "log_odds_chosen": 3.947856903076172, "log_odds_ratio": -0.08405720442533493, "logits/chosen": 0.8697361946105957, "logits/rejected": 0.8333525061607361, "logps/chosen": -2.210968017578125, "logps/rejected": -5.994229793548584, "loss": 0.73, "nll_loss": 0.7215541005134583, "rewards/accuracies": 1.0, "rewards/chosen": -0.22109681367874146, "rewards/margins": 0.3783261775970459, "rewards/rejected": -0.5994229912757874, "step": 7110 }, { "epoch": 19.468856947296374, "grad_norm": 4.091686248779297, "learning_rate": 2.5890410958904106e-08, "log_odds_chosen": 2.398911952972412, "log_odds_ratio": -0.26568078994750977, "logits/chosen": 1.152602195739746, "logits/rejected": 1.0600093603134155, "logps/chosen": -1.7639200687408447, "logps/rejected": -4.013056755065918, "loss": 0.4613, "nll_loss": 0.43477970361709595, "rewards/accuracies": 0.875, "rewards/chosen": -0.17639201879501343, "rewards/margins": 0.22491368651390076, "rewards/rejected": -0.4013057351112366, "step": 7111 }, { "epoch": 19.471594798083505, "grad_norm": 3.4440317153930664, "learning_rate": 2.5753424657534245e-08, "log_odds_chosen": 4.910042762756348, "log_odds_ratio": -0.039813246577978134, "logits/chosen": 1.3243145942687988, "logits/rejected": 1.36501944065094, "logps/chosen": -1.543208360671997, "logps/rejected": -6.118577003479004, "loss": 0.5222, "nll_loss": 0.5182442665100098, "rewards/accuracies": 1.0, "rewards/chosen": -0.1543208360671997, "rewards/margins": 0.45753684639930725, "rewards/rejected": -0.6118577122688293, "step": 7112 }, { "epoch": 19.474332648870636, "grad_norm": 7.848231792449951, "learning_rate": 2.5616438356164384e-08, "log_odds_chosen": 2.2405858039855957, "log_odds_ratio": -0.528090238571167, "logits/chosen": 1.1325379610061646, "logits/rejected": 1.173112392425537, "logps/chosen": -2.5798377990722656, "logps/rejected": -4.740553855895996, "loss": 0.633, "nll_loss": 0.5802053809165955, "rewards/accuracies": 0.625, "rewards/chosen": -0.2579838037490845, "rewards/margins": 0.21607159078121185, "rewards/rejected": -0.47405537962913513, "step": 7113 }, { "epoch": 19.477070499657767, "grad_norm": 3.365201711654663, "learning_rate": 2.547945205479452e-08, "log_odds_chosen": 4.2852277755737305, "log_odds_ratio": -0.08332549035549164, "logits/chosen": 1.1250742673873901, "logits/rejected": 1.1232380867004395, "logps/chosen": -1.8936740159988403, "logps/rejected": -5.887111186981201, "loss": 0.5254, "nll_loss": 0.517055094242096, "rewards/accuracies": 1.0, "rewards/chosen": -0.1893673837184906, "rewards/margins": 0.39934372901916504, "rewards/rejected": -0.588711142539978, "step": 7114 }, { "epoch": 19.479808350444902, "grad_norm": 4.428563594818115, "learning_rate": 2.5342465753424655e-08, "log_odds_chosen": 2.864131212234497, "log_odds_ratio": -0.2352803498506546, "logits/chosen": 1.2572364807128906, "logits/rejected": 1.2344305515289307, "logps/chosen": -1.441393494606018, "logps/rejected": -4.113569736480713, "loss": 0.4462, "nll_loss": 0.4226953089237213, "rewards/accuracies": 1.0, "rewards/chosen": -0.1441393494606018, "rewards/margins": 0.26721763610839844, "rewards/rejected": -0.41135701537132263, "step": 7115 }, { "epoch": 19.482546201232033, "grad_norm": 3.2752132415771484, "learning_rate": 2.5205479452054794e-08, "log_odds_chosen": 3.891475200653076, "log_odds_ratio": -0.13975335657596588, "logits/chosen": 1.176056146621704, "logits/rejected": 1.2126169204711914, "logps/chosen": -1.4290425777435303, "logps/rejected": -5.0302605628967285, "loss": 0.5228, "nll_loss": 0.5088168382644653, "rewards/accuracies": 1.0, "rewards/chosen": -0.14290426671504974, "rewards/margins": 0.36012178659439087, "rewards/rejected": -0.503026008605957, "step": 7116 }, { "epoch": 19.485284052019164, "grad_norm": 3.2128841876983643, "learning_rate": 2.506849315068493e-08, "log_odds_chosen": 3.187216281890869, "log_odds_ratio": -0.09950686991214752, "logits/chosen": 1.28071928024292, "logits/rejected": 1.2439227104187012, "logps/chosen": -1.5888534784317017, "logps/rejected": -4.507638931274414, "loss": 0.4845, "nll_loss": 0.4745197296142578, "rewards/accuracies": 1.0, "rewards/chosen": -0.15888534486293793, "rewards/margins": 0.2918785810470581, "rewards/rejected": -0.4507639408111572, "step": 7117 }, { "epoch": 19.488021902806295, "grad_norm": 3.841778039932251, "learning_rate": 2.4931506849315068e-08, "log_odds_chosen": 2.843428611755371, "log_odds_ratio": -0.18163740634918213, "logits/chosen": 1.0828920602798462, "logits/rejected": 1.140446424484253, "logps/chosen": -2.360156297683716, "logps/rejected": -5.045234680175781, "loss": 0.5393, "nll_loss": 0.5211619734764099, "rewards/accuracies": 1.0, "rewards/chosen": -0.23601561784744263, "rewards/margins": 0.26850786805152893, "rewards/rejected": -0.5045234560966492, "step": 7118 }, { "epoch": 19.49075975359343, "grad_norm": 3.713371992111206, "learning_rate": 2.4794520547945207e-08, "log_odds_chosen": 3.6693460941314697, "log_odds_ratio": -0.1235823705792427, "logits/chosen": 1.0021296739578247, "logits/rejected": 0.9495376944541931, "logps/chosen": -1.2892967462539673, "logps/rejected": -4.503527641296387, "loss": 0.5667, "nll_loss": 0.5543697476387024, "rewards/accuracies": 1.0, "rewards/chosen": -0.12892967462539673, "rewards/margins": 0.32142308354377747, "rewards/rejected": -0.4503527581691742, "step": 7119 }, { "epoch": 19.49349760438056, "grad_norm": 3.666839838027954, "learning_rate": 2.465753424657534e-08, "log_odds_chosen": 4.667713642120361, "log_odds_ratio": -0.15776626765727997, "logits/chosen": 1.0108668804168701, "logits/rejected": 1.0243616104125977, "logps/chosen": -2.2553415298461914, "logps/rejected": -6.749945640563965, "loss": 0.5946, "nll_loss": 0.5788310766220093, "rewards/accuracies": 1.0, "rewards/chosen": -0.22553414106369019, "rewards/margins": 0.4494604468345642, "rewards/rejected": -0.6749945878982544, "step": 7120 }, { "epoch": 19.496235455167692, "grad_norm": 3.706088066101074, "learning_rate": 2.4520547945205478e-08, "log_odds_chosen": 2.9454572200775146, "log_odds_ratio": -0.12545369565486908, "logits/chosen": 0.9827616214752197, "logits/rejected": 0.9177373647689819, "logps/chosen": -1.8246866464614868, "logps/rejected": -4.533835411071777, "loss": 0.5892, "nll_loss": 0.5766245126724243, "rewards/accuracies": 1.0, "rewards/chosen": -0.18246868252754211, "rewards/margins": 0.27091485261917114, "rewards/rejected": -0.45338353514671326, "step": 7121 }, { "epoch": 19.498973305954827, "grad_norm": 10.23214340209961, "learning_rate": 2.4383561643835614e-08, "log_odds_chosen": 1.8365474939346313, "log_odds_ratio": -0.24940085411071777, "logits/chosen": 1.341569185256958, "logits/rejected": 1.3168524503707886, "logps/chosen": -2.2133777141571045, "logps/rejected": -3.936100959777832, "loss": 0.5205, "nll_loss": 0.49554961919784546, "rewards/accuracies": 1.0, "rewards/chosen": -0.22133776545524597, "rewards/margins": 0.17227230966091156, "rewards/rejected": -0.39361006021499634, "step": 7122 }, { "epoch": 19.50171115674196, "grad_norm": 3.832307815551758, "learning_rate": 2.4246575342465753e-08, "log_odds_chosen": 2.0762410163879395, "log_odds_ratio": -0.2357248067855835, "logits/chosen": 1.2428404092788696, "logits/rejected": 1.2546038627624512, "logps/chosen": -1.6478732824325562, "logps/rejected": -3.5643866062164307, "loss": 0.4661, "nll_loss": 0.44252312183380127, "rewards/accuracies": 0.875, "rewards/chosen": -0.16478733718395233, "rewards/margins": 0.1916513293981552, "rewards/rejected": -0.35643869638442993, "step": 7123 }, { "epoch": 19.50444900752909, "grad_norm": 5.562934398651123, "learning_rate": 2.410958904109589e-08, "log_odds_chosen": 4.515396595001221, "log_odds_ratio": -0.15167778730392456, "logits/chosen": 1.217240333557129, "logits/rejected": 1.2388802766799927, "logps/chosen": -2.4448814392089844, "logps/rejected": -6.830141067504883, "loss": 0.5725, "nll_loss": 0.5573611855506897, "rewards/accuracies": 1.0, "rewards/chosen": -0.24448814988136292, "rewards/margins": 0.4385260045528412, "rewards/rejected": -0.6830141544342041, "step": 7124 }, { "epoch": 19.50718685831622, "grad_norm": 3.7358438968658447, "learning_rate": 2.3972602739726024e-08, "log_odds_chosen": 1.2997626066207886, "log_odds_ratio": -0.3284899592399597, "logits/chosen": 1.351472020149231, "logits/rejected": 1.304081916809082, "logps/chosen": -1.5874691009521484, "logps/rejected": -2.738870143890381, "loss": 0.4638, "nll_loss": 0.43091240525245667, "rewards/accuracies": 1.0, "rewards/chosen": -0.15874692797660828, "rewards/margins": 0.11514009535312653, "rewards/rejected": -0.2738870084285736, "step": 7125 }, { "epoch": 19.509924709103355, "grad_norm": 3.7147090435028076, "learning_rate": 2.3835616438356162e-08, "log_odds_chosen": 4.947254180908203, "log_odds_ratio": -0.043709415942430496, "logits/chosen": 1.156661033630371, "logits/rejected": 1.1727652549743652, "logps/chosen": -2.2174739837646484, "logps/rejected": -7.006131649017334, "loss": 0.6159, "nll_loss": 0.6115380525588989, "rewards/accuracies": 1.0, "rewards/chosen": -0.22174739837646484, "rewards/margins": 0.4788658022880554, "rewards/rejected": -0.7006131410598755, "step": 7126 }, { "epoch": 19.512662559890487, "grad_norm": 3.7060022354125977, "learning_rate": 2.36986301369863e-08, "log_odds_chosen": 3.129422664642334, "log_odds_ratio": -0.1966155767440796, "logits/chosen": 0.8778536319732666, "logits/rejected": 0.8323521018028259, "logps/chosen": -1.9286710023880005, "logps/rejected": -4.89554500579834, "loss": 0.4694, "nll_loss": 0.4497545063495636, "rewards/accuracies": 0.875, "rewards/chosen": -0.19286710023880005, "rewards/margins": 0.29668742418289185, "rewards/rejected": -0.4895545244216919, "step": 7127 }, { "epoch": 19.515400410677618, "grad_norm": 3.6795129776000977, "learning_rate": 2.3561643835616437e-08, "log_odds_chosen": 2.847564458847046, "log_odds_ratio": -0.09575317800045013, "logits/chosen": 1.17264986038208, "logits/rejected": 1.2511783838272095, "logps/chosen": -1.9975500106811523, "logps/rejected": -4.65621280670166, "loss": 0.5873, "nll_loss": 0.5776992440223694, "rewards/accuracies": 1.0, "rewards/chosen": -0.199754998087883, "rewards/margins": 0.2658662796020508, "rewards/rejected": -0.46562129259109497, "step": 7128 }, { "epoch": 19.51813826146475, "grad_norm": 6.973484039306641, "learning_rate": 2.3424657534246576e-08, "log_odds_chosen": 4.716814994812012, "log_odds_ratio": -0.35022780299186707, "logits/chosen": 1.6321007013320923, "logits/rejected": 1.643995761871338, "logps/chosen": -2.476745843887329, "logps/rejected": -7.098811149597168, "loss": 0.5341, "nll_loss": 0.4991012215614319, "rewards/accuracies": 0.875, "rewards/chosen": -0.2476746141910553, "rewards/margins": 0.46220654249191284, "rewards/rejected": -0.7098811864852905, "step": 7129 }, { "epoch": 19.520876112251884, "grad_norm": 3.834709405899048, "learning_rate": 2.328767123287671e-08, "log_odds_chosen": 2.325528621673584, "log_odds_ratio": -0.22514843940734863, "logits/chosen": 1.1775099039077759, "logits/rejected": 1.115488052368164, "logps/chosen": -1.5718965530395508, "logps/rejected": -3.739274501800537, "loss": 0.5369, "nll_loss": 0.51438969373703, "rewards/accuracies": 1.0, "rewards/chosen": -0.15718966722488403, "rewards/margins": 0.2167377769947052, "rewards/rejected": -0.37392744421958923, "step": 7130 }, { "epoch": 19.523613963039015, "grad_norm": 3.661313533782959, "learning_rate": 2.3150684931506847e-08, "log_odds_chosen": 3.5273733139038086, "log_odds_ratio": -0.1502578854560852, "logits/chosen": 1.0525113344192505, "logits/rejected": 1.0442783832550049, "logps/chosen": -3.127593755722046, "logps/rejected": -6.561874866485596, "loss": 0.6162, "nll_loss": 0.601193904876709, "rewards/accuracies": 1.0, "rewards/chosen": -0.3127593994140625, "rewards/margins": 0.3434280753135681, "rewards/rejected": -0.6561874747276306, "step": 7131 }, { "epoch": 19.526351813826146, "grad_norm": 3.53178071975708, "learning_rate": 2.3013698630136986e-08, "log_odds_chosen": 3.1017489433288574, "log_odds_ratio": -0.22171403467655182, "logits/chosen": 0.9517450928688049, "logits/rejected": 0.94716477394104, "logps/chosen": -1.9721412658691406, "logps/rejected": -4.940823554992676, "loss": 0.5363, "nll_loss": 0.5141206979751587, "rewards/accuracies": 1.0, "rewards/chosen": -0.19721412658691406, "rewards/margins": 0.2968682050704956, "rewards/rejected": -0.49408236145973206, "step": 7132 }, { "epoch": 19.529089664613277, "grad_norm": 4.12699031829834, "learning_rate": 2.287671232876712e-08, "log_odds_chosen": 4.238862037658691, "log_odds_ratio": -0.056832052767276764, "logits/chosen": 1.0043344497680664, "logits/rejected": 1.0413099527359009, "logps/chosen": -1.6318483352661133, "logps/rejected": -5.492180347442627, "loss": 0.7915, "nll_loss": 0.7858336567878723, "rewards/accuracies": 1.0, "rewards/chosen": -0.16318483650684357, "rewards/margins": 0.3860332667827606, "rewards/rejected": -0.5492180585861206, "step": 7133 }, { "epoch": 19.531827515400412, "grad_norm": 5.139143943786621, "learning_rate": 2.273972602739726e-08, "log_odds_chosen": 4.661551475524902, "log_odds_ratio": -0.1972407102584839, "logits/chosen": 1.3465794324874878, "logits/rejected": 1.3927936553955078, "logps/chosen": -2.696958541870117, "logps/rejected": -7.2811737060546875, "loss": 0.651, "nll_loss": 0.6312283277511597, "rewards/accuracies": 1.0, "rewards/chosen": -0.26969587802886963, "rewards/margins": 0.4584214985370636, "rewards/rejected": -0.7281173467636108, "step": 7134 }, { "epoch": 19.534565366187543, "grad_norm": 6.218548774719238, "learning_rate": 2.2602739726027396e-08, "log_odds_chosen": 1.900759220123291, "log_odds_ratio": -0.22924350202083588, "logits/chosen": 1.3184210062026978, "logits/rejected": 1.2446882724761963, "logps/chosen": -2.203671932220459, "logps/rejected": -3.958655834197998, "loss": 0.5997, "nll_loss": 0.576741099357605, "rewards/accuracies": 1.0, "rewards/chosen": -0.2203672081232071, "rewards/margins": 0.17549841105937958, "rewards/rejected": -0.39586561918258667, "step": 7135 }, { "epoch": 19.537303216974674, "grad_norm": 4.496918201446533, "learning_rate": 2.246575342465753e-08, "log_odds_chosen": 2.832336187362671, "log_odds_ratio": -0.17299389839172363, "logits/chosen": 1.2012486457824707, "logits/rejected": 1.2315850257873535, "logps/chosen": -1.9906909465789795, "logps/rejected": -4.562278747558594, "loss": 0.4644, "nll_loss": 0.4470626711845398, "rewards/accuracies": 0.875, "rewards/chosen": -0.1990690976381302, "rewards/margins": 0.2571587562561035, "rewards/rejected": -0.4562278687953949, "step": 7136 }, { "epoch": 19.540041067761805, "grad_norm": 6.3011250495910645, "learning_rate": 2.232876712328767e-08, "log_odds_chosen": 1.920328140258789, "log_odds_ratio": -0.35702869296073914, "logits/chosen": 0.9235152006149292, "logits/rejected": 0.9476617574691772, "logps/chosen": -2.594113349914551, "logps/rejected": -4.425015926361084, "loss": 0.5952, "nll_loss": 0.5595178604125977, "rewards/accuracies": 0.875, "rewards/chosen": -0.2594113349914551, "rewards/margins": 0.18309026956558228, "rewards/rejected": -0.44250160455703735, "step": 7137 }, { "epoch": 19.54277891854894, "grad_norm": 3.4031059741973877, "learning_rate": 2.219178082191781e-08, "log_odds_chosen": 3.4306986331939697, "log_odds_ratio": -0.13759037852287292, "logits/chosen": 1.2026393413543701, "logits/rejected": 1.2210216522216797, "logps/chosen": -1.43711519241333, "logps/rejected": -4.4952592849731445, "loss": 0.4621, "nll_loss": 0.44836848974227905, "rewards/accuracies": 1.0, "rewards/chosen": -0.14371152222156525, "rewards/margins": 0.30581438541412354, "rewards/rejected": -0.4495258927345276, "step": 7138 }, { "epoch": 19.54551676933607, "grad_norm": 5.8290252685546875, "learning_rate": 2.2054794520547944e-08, "log_odds_chosen": 3.678708553314209, "log_odds_ratio": -0.4170479476451874, "logits/chosen": 1.1708861589431763, "logits/rejected": 1.2576321363449097, "logps/chosen": -2.447376251220703, "logps/rejected": -5.930212497711182, "loss": 0.6599, "nll_loss": 0.6181797385215759, "rewards/accuracies": 0.875, "rewards/chosen": -0.2447376400232315, "rewards/margins": 0.3482836186885834, "rewards/rejected": -0.5930212736129761, "step": 7139 }, { "epoch": 19.548254620123203, "grad_norm": 4.187984466552734, "learning_rate": 2.191780821917808e-08, "log_odds_chosen": 2.3735008239746094, "log_odds_ratio": -0.18671418726444244, "logits/chosen": 1.2729442119598389, "logits/rejected": 1.220805048942566, "logps/chosen": -1.5810911655426025, "logps/rejected": -3.707456111907959, "loss": 0.4628, "nll_loss": 0.4441552460193634, "rewards/accuracies": 1.0, "rewards/chosen": -0.15810911357402802, "rewards/margins": 0.21263651549816132, "rewards/rejected": -0.3707456588745117, "step": 7140 }, { "epoch": 19.550992470910334, "grad_norm": 5.275199890136719, "learning_rate": 2.178082191780822e-08, "log_odds_chosen": 0.9403228759765625, "log_odds_ratio": -0.5175081491470337, "logits/chosen": 1.2094329595565796, "logits/rejected": 1.233187198638916, "logps/chosen": -2.28576922416687, "logps/rejected": -3.0520923137664795, "loss": 0.6705, "nll_loss": 0.618768572807312, "rewards/accuracies": 0.875, "rewards/chosen": -0.2285769283771515, "rewards/margins": 0.07663232833147049, "rewards/rejected": -0.3052092492580414, "step": 7141 }, { "epoch": 19.55373032169747, "grad_norm": 3.720306873321533, "learning_rate": 2.1643835616438354e-08, "log_odds_chosen": 3.1826694011688232, "log_odds_ratio": -0.1780279278755188, "logits/chosen": 0.9609779119491577, "logits/rejected": 0.9133910536766052, "logps/chosen": -1.5747625827789307, "logps/rejected": -4.474557876586914, "loss": 0.5615, "nll_loss": 0.5436870455741882, "rewards/accuracies": 1.0, "rewards/chosen": -0.1574762463569641, "rewards/margins": 0.28997957706451416, "rewards/rejected": -0.44745582342147827, "step": 7142 }, { "epoch": 19.5564681724846, "grad_norm": 4.3565874099731445, "learning_rate": 2.1506849315068493e-08, "log_odds_chosen": 1.84821617603302, "log_odds_ratio": -0.19344225525856018, "logits/chosen": 1.1213328838348389, "logits/rejected": 1.1385765075683594, "logps/chosen": -1.7158360481262207, "logps/rejected": -3.355708122253418, "loss": 0.4562, "nll_loss": 0.43684667348861694, "rewards/accuracies": 1.0, "rewards/chosen": -0.1715836226940155, "rewards/margins": 0.1639872044324875, "rewards/rejected": -0.3355708420276642, "step": 7143 }, { "epoch": 19.55920602327173, "grad_norm": 3.804231882095337, "learning_rate": 2.1369863013698632e-08, "log_odds_chosen": 3.397242546081543, "log_odds_ratio": -0.06222802400588989, "logits/chosen": 1.2457866668701172, "logits/rejected": 1.3198673725128174, "logps/chosen": -1.8551063537597656, "logps/rejected": -5.002796173095703, "loss": 0.5422, "nll_loss": 0.5359506607055664, "rewards/accuracies": 1.0, "rewards/chosen": -0.18551063537597656, "rewards/margins": 0.31476902961730957, "rewards/rejected": -0.5002796649932861, "step": 7144 }, { "epoch": 19.561943874058862, "grad_norm": 5.165901184082031, "learning_rate": 2.1232876712328764e-08, "log_odds_chosen": 2.880262613296509, "log_odds_ratio": -0.5234413146972656, "logits/chosen": 0.9661571979522705, "logits/rejected": 1.0867838859558105, "logps/chosen": -2.7920963764190674, "logps/rejected": -5.574519157409668, "loss": 0.6682, "nll_loss": 0.6158409118652344, "rewards/accuracies": 0.875, "rewards/chosen": -0.2792096436023712, "rewards/margins": 0.2782422602176666, "rewards/rejected": -0.5574519038200378, "step": 7145 }, { "epoch": 19.564681724845997, "grad_norm": 14.967414855957031, "learning_rate": 2.1095890410958903e-08, "log_odds_chosen": 2.796146869659424, "log_odds_ratio": -0.3056267201900482, "logits/chosen": 1.2015471458435059, "logits/rejected": 1.1942565441131592, "logps/chosen": -2.8683383464813232, "logps/rejected": -5.5351104736328125, "loss": 0.6888, "nll_loss": 0.6582069396972656, "rewards/accuracies": 0.875, "rewards/chosen": -0.28683382272720337, "rewards/margins": 0.26667726039886475, "rewards/rejected": -0.5535110831260681, "step": 7146 }, { "epoch": 19.567419575633128, "grad_norm": 4.514114856719971, "learning_rate": 2.095890410958904e-08, "log_odds_chosen": 1.898597240447998, "log_odds_ratio": -0.145828977227211, "logits/chosen": 0.9297424554824829, "logits/rejected": 0.8456473350524902, "logps/chosen": -1.5442250967025757, "logps/rejected": -3.1758875846862793, "loss": 0.4008, "nll_loss": 0.3862444758415222, "rewards/accuracies": 1.0, "rewards/chosen": -0.15442250669002533, "rewards/margins": 0.16316625475883484, "rewards/rejected": -0.31758877635002136, "step": 7147 }, { "epoch": 19.57015742642026, "grad_norm": 3.90427827835083, "learning_rate": 2.0821917808219177e-08, "log_odds_chosen": 4.636950969696045, "log_odds_ratio": -0.26484397053718567, "logits/chosen": 0.8635699152946472, "logits/rejected": 0.8820606470108032, "logps/chosen": -1.5725982189178467, "logps/rejected": -6.038958549499512, "loss": 0.6175, "nll_loss": 0.5910192728042603, "rewards/accuracies": 0.875, "rewards/chosen": -0.15725982189178467, "rewards/margins": 0.44663602113723755, "rewards/rejected": -0.6038958430290222, "step": 7148 }, { "epoch": 19.572895277207394, "grad_norm": 6.312036991119385, "learning_rate": 2.0684931506849316e-08, "log_odds_chosen": 2.293462038040161, "log_odds_ratio": -0.24382871389389038, "logits/chosen": 1.0951319932937622, "logits/rejected": 1.0458341836929321, "logps/chosen": -1.982808232307434, "logps/rejected": -4.062978744506836, "loss": 0.5363, "nll_loss": 0.5118875503540039, "rewards/accuracies": 1.0, "rewards/chosen": -0.19828081130981445, "rewards/margins": 0.20801708102226257, "rewards/rejected": -0.4062979221343994, "step": 7149 }, { "epoch": 19.575633127994525, "grad_norm": 4.418295383453369, "learning_rate": 2.054794520547945e-08, "log_odds_chosen": 5.02849006652832, "log_odds_ratio": -0.09888322651386261, "logits/chosen": 1.0643409490585327, "logits/rejected": 1.185486912727356, "logps/chosen": -2.103950262069702, "logps/rejected": -6.802948951721191, "loss": 0.5761, "nll_loss": 0.566204845905304, "rewards/accuracies": 1.0, "rewards/chosen": -0.21039503812789917, "rewards/margins": 0.46989983320236206, "rewards/rejected": -0.6802948713302612, "step": 7150 }, { "epoch": 19.578370978781656, "grad_norm": 3.4102671146392822, "learning_rate": 2.0410958904109587e-08, "log_odds_chosen": 2.9564881324768066, "log_odds_ratio": -0.14947223663330078, "logits/chosen": 0.9310017228126526, "logits/rejected": 0.8221296072006226, "logps/chosen": -1.2658312320709229, "logps/rejected": -3.9242758750915527, "loss": 0.432, "nll_loss": 0.4170190095901489, "rewards/accuracies": 1.0, "rewards/chosen": -0.12658312916755676, "rewards/margins": 0.2658444941043854, "rewards/rejected": -0.39242762327194214, "step": 7151 }, { "epoch": 19.581108829568787, "grad_norm": 4.013147830963135, "learning_rate": 2.0273972602739726e-08, "log_odds_chosen": 4.635350227355957, "log_odds_ratio": -0.16806761920452118, "logits/chosen": 1.399428367614746, "logits/rejected": 1.4644229412078857, "logps/chosen": -2.0895516872406006, "logps/rejected": -6.553282737731934, "loss": 0.5242, "nll_loss": 0.5073632001876831, "rewards/accuracies": 1.0, "rewards/chosen": -0.20895516872406006, "rewards/margins": 0.4463731050491333, "rewards/rejected": -0.6553282737731934, "step": 7152 }, { "epoch": 19.583846680355922, "grad_norm": 3.8408679962158203, "learning_rate": 2.013698630136986e-08, "log_odds_chosen": 3.996128559112549, "log_odds_ratio": -0.22599169611930847, "logits/chosen": 0.9100486040115356, "logits/rejected": 0.8492261171340942, "logps/chosen": -2.122223377227783, "logps/rejected": -5.929224491119385, "loss": 0.5222, "nll_loss": 0.49963104724884033, "rewards/accuracies": 0.875, "rewards/chosen": -0.21222233772277832, "rewards/margins": 0.38070008158683777, "rewards/rejected": -0.5929224491119385, "step": 7153 }, { "epoch": 19.586584531143053, "grad_norm": 6.565371513366699, "learning_rate": 2e-08, "log_odds_chosen": 3.11732816696167, "log_odds_ratio": -0.2854066789150238, "logits/chosen": 0.9249787330627441, "logits/rejected": 0.9543591737747192, "logps/chosen": -2.152801990509033, "logps/rejected": -5.130288600921631, "loss": 0.7125, "nll_loss": 0.6839685440063477, "rewards/accuracies": 0.875, "rewards/chosen": -0.2152802050113678, "rewards/margins": 0.2977486848831177, "rewards/rejected": -0.5130288600921631, "step": 7154 }, { "epoch": 19.589322381930184, "grad_norm": 7.86923885345459, "learning_rate": 1.986301369863014e-08, "log_odds_chosen": 2.5541787147521973, "log_odds_ratio": -0.6155379414558411, "logits/chosen": 1.1911160945892334, "logits/rejected": 1.2284879684448242, "logps/chosen": -2.6191213130950928, "logps/rejected": -5.100542068481445, "loss": 0.6524, "nll_loss": 0.5908868908882141, "rewards/accuracies": 0.75, "rewards/chosen": -0.26191213726997375, "rewards/margins": 0.2481420487165451, "rewards/rejected": -0.5100542306900024, "step": 7155 }, { "epoch": 19.592060232717316, "grad_norm": 3.641420364379883, "learning_rate": 1.972602739726027e-08, "log_odds_chosen": 3.5277843475341797, "log_odds_ratio": -0.2747645378112793, "logits/chosen": 1.251876711845398, "logits/rejected": 1.2805821895599365, "logps/chosen": -1.396446704864502, "logps/rejected": -4.755537033081055, "loss": 0.4692, "nll_loss": 0.44167667627334595, "rewards/accuracies": 0.875, "rewards/chosen": -0.13964466750621796, "rewards/margins": 0.33590903878211975, "rewards/rejected": -0.4755536913871765, "step": 7156 }, { "epoch": 19.59479808350445, "grad_norm": 3.831568717956543, "learning_rate": 1.958904109589041e-08, "log_odds_chosen": 3.9823458194732666, "log_odds_ratio": -0.17580056190490723, "logits/chosen": 0.8887789249420166, "logits/rejected": 0.7883551716804504, "logps/chosen": -1.661146879196167, "logps/rejected": -5.465734958648682, "loss": 0.5647, "nll_loss": 0.5471439361572266, "rewards/accuracies": 1.0, "rewards/chosen": -0.1661146879196167, "rewards/margins": 0.3804588317871094, "rewards/rejected": -0.5465735197067261, "step": 7157 }, { "epoch": 19.59753593429158, "grad_norm": 9.294102668762207, "learning_rate": 1.9452054794520546e-08, "log_odds_chosen": 0.5450586080551147, "log_odds_ratio": -0.636459469795227, "logits/chosen": 1.1953589916229248, "logits/rejected": 1.0695602893829346, "logps/chosen": -2.072664260864258, "logps/rejected": -2.39555287361145, "loss": 0.55, "nll_loss": 0.4863877296447754, "rewards/accuracies": 0.875, "rewards/chosen": -0.2072664499282837, "rewards/margins": 0.032288871705532074, "rewards/rejected": -0.23955529928207397, "step": 7158 }, { "epoch": 19.600273785078713, "grad_norm": 12.955488204956055, "learning_rate": 1.9315068493150685e-08, "log_odds_chosen": 1.832595705986023, "log_odds_ratio": -0.3265725374221802, "logits/chosen": 1.163097620010376, "logits/rejected": 1.0084176063537598, "logps/chosen": -2.2769558429718018, "logps/rejected": -3.881031036376953, "loss": 0.4828, "nll_loss": 0.4501112103462219, "rewards/accuracies": 0.875, "rewards/chosen": -0.22769556939601898, "rewards/margins": 0.16040751338005066, "rewards/rejected": -0.38810309767723083, "step": 7159 }, { "epoch": 19.603011635865844, "grad_norm": 7.99985408782959, "learning_rate": 1.9178082191780824e-08, "log_odds_chosen": 1.2918869256973267, "log_odds_ratio": -0.4548966586589813, "logits/chosen": 1.004016637802124, "logits/rejected": 0.901168167591095, "logps/chosen": -1.939934492111206, "logps/rejected": -3.0545973777770996, "loss": 0.641, "nll_loss": 0.5954617857933044, "rewards/accuracies": 0.875, "rewards/chosen": -0.1939934343099594, "rewards/margins": 0.11146630346775055, "rewards/rejected": -0.30545973777770996, "step": 7160 }, { "epoch": 19.60574948665298, "grad_norm": 4.053231239318848, "learning_rate": 1.9041095890410956e-08, "log_odds_chosen": 2.8790972232818604, "log_odds_ratio": -0.23347243666648865, "logits/chosen": 0.7467212080955505, "logits/rejected": 0.7048839330673218, "logps/chosen": -2.134148597717285, "logps/rejected": -4.901519298553467, "loss": 0.6696, "nll_loss": 0.6462246179580688, "rewards/accuracies": 1.0, "rewards/chosen": -0.21341486275196075, "rewards/margins": 0.2767370641231537, "rewards/rejected": -0.49015191197395325, "step": 7161 }, { "epoch": 19.60848733744011, "grad_norm": 3.2842164039611816, "learning_rate": 1.8904109589041095e-08, "log_odds_chosen": 2.991417407989502, "log_odds_ratio": -0.253613144159317, "logits/chosen": 1.2438040971755981, "logits/rejected": 1.2268825769424438, "logps/chosen": -1.981580376625061, "logps/rejected": -4.846781253814697, "loss": 0.4917, "nll_loss": 0.4663453996181488, "rewards/accuracies": 0.875, "rewards/chosen": -0.19815804064273834, "rewards/margins": 0.2865200936794281, "rewards/rejected": -0.48467814922332764, "step": 7162 }, { "epoch": 19.61122518822724, "grad_norm": 5.926918029785156, "learning_rate": 1.8767123287671233e-08, "log_odds_chosen": 3.2435665130615234, "log_odds_ratio": -0.2422160506248474, "logits/chosen": 1.3428400754928589, "logits/rejected": 1.3225915431976318, "logps/chosen": -1.6419492959976196, "logps/rejected": -4.641016006469727, "loss": 0.5621, "nll_loss": 0.5379260778427124, "rewards/accuracies": 0.875, "rewards/chosen": -0.16419494152069092, "rewards/margins": 0.2999066412448883, "rewards/rejected": -0.46410155296325684, "step": 7163 }, { "epoch": 19.613963039014372, "grad_norm": 3.665649652481079, "learning_rate": 1.863013698630137e-08, "log_odds_chosen": 3.59047269821167, "log_odds_ratio": -0.2268039733171463, "logits/chosen": 1.009427785873413, "logits/rejected": 1.036219596862793, "logps/chosen": -2.01194429397583, "logps/rejected": -5.4140777587890625, "loss": 0.4946, "nll_loss": 0.47195005416870117, "rewards/accuracies": 0.875, "rewards/chosen": -0.20119443535804749, "rewards/margins": 0.3402133285999298, "rewards/rejected": -0.5414077639579773, "step": 7164 }, { "epoch": 19.616700889801507, "grad_norm": 3.8757858276367188, "learning_rate": 1.8493150684931508e-08, "log_odds_chosen": 1.7475202083587646, "log_odds_ratio": -0.3244132697582245, "logits/chosen": 1.410900592803955, "logits/rejected": 1.4552764892578125, "logps/chosen": -1.8847534656524658, "logps/rejected": -3.5400047302246094, "loss": 0.5427, "nll_loss": 0.5102657675743103, "rewards/accuracies": 0.875, "rewards/chosen": -0.1884753704071045, "rewards/margins": 0.16552510857582092, "rewards/rejected": -0.3540004789829254, "step": 7165 }, { "epoch": 19.619438740588638, "grad_norm": 9.778549194335938, "learning_rate": 1.8356164383561643e-08, "log_odds_chosen": 2.019655704498291, "log_odds_ratio": -0.3521539568901062, "logits/chosen": 1.2873691320419312, "logits/rejected": 1.2677152156829834, "logps/chosen": -2.165557622909546, "logps/rejected": -3.950850486755371, "loss": 0.5178, "nll_loss": 0.4825926125049591, "rewards/accuracies": 0.875, "rewards/chosen": -0.21655577421188354, "rewards/margins": 0.178529292345047, "rewards/rejected": -0.39508506655693054, "step": 7166 }, { "epoch": 19.62217659137577, "grad_norm": 3.779996395111084, "learning_rate": 1.821917808219178e-08, "log_odds_chosen": 3.187512159347534, "log_odds_ratio": -0.16366766393184662, "logits/chosen": 1.255210280418396, "logits/rejected": 1.3048981428146362, "logps/chosen": -2.303225040435791, "logps/rejected": -5.351161956787109, "loss": 0.5729, "nll_loss": 0.5564897656440735, "rewards/accuracies": 1.0, "rewards/chosen": -0.23032251000404358, "rewards/margins": 0.30479365587234497, "rewards/rejected": -0.5351161956787109, "step": 7167 }, { "epoch": 19.6249144421629, "grad_norm": 8.837516784667969, "learning_rate": 1.8082191780821918e-08, "log_odds_chosen": 0.8059967756271362, "log_odds_ratio": -0.6102526187896729, "logits/chosen": 1.0617185831069946, "logits/rejected": 0.9907657504081726, "logps/chosen": -1.716098666191101, "logps/rejected": -2.3850932121276855, "loss": 0.4846, "nll_loss": 0.4235377311706543, "rewards/accuracies": 0.875, "rewards/chosen": -0.17160987854003906, "rewards/margins": 0.06689944863319397, "rewards/rejected": -0.23850931227207184, "step": 7168 }, { "epoch": 19.627652292950035, "grad_norm": 4.601699352264404, "learning_rate": 1.7945205479452053e-08, "log_odds_chosen": 1.8421154022216797, "log_odds_ratio": -0.39653706550598145, "logits/chosen": 1.0150387287139893, "logits/rejected": 1.0661495923995972, "logps/chosen": -2.1937742233276367, "logps/rejected": -3.8608522415161133, "loss": 0.5697, "nll_loss": 0.530093789100647, "rewards/accuracies": 0.875, "rewards/chosen": -0.21937741339206696, "rewards/margins": 0.1667078584432602, "rewards/rejected": -0.38608527183532715, "step": 7169 }, { "epoch": 19.630390143737166, "grad_norm": 4.63654899597168, "learning_rate": 1.7808219178082192e-08, "log_odds_chosen": 2.368868827819824, "log_odds_ratio": -0.19529221951961517, "logits/chosen": 1.0702886581420898, "logits/rejected": 1.136147141456604, "logps/chosen": -2.5198514461517334, "logps/rejected": -4.789785385131836, "loss": 0.7704, "nll_loss": 0.7508776783943176, "rewards/accuracies": 1.0, "rewards/chosen": -0.2519851326942444, "rewards/margins": 0.2269934117794037, "rewards/rejected": -0.4789785146713257, "step": 7170 }, { "epoch": 19.633127994524298, "grad_norm": 3.5788733959198, "learning_rate": 1.7671232876712328e-08, "log_odds_chosen": 2.2133753299713135, "log_odds_ratio": -0.16004496812820435, "logits/chosen": 1.162106990814209, "logits/rejected": 1.1687116622924805, "logps/chosen": -1.5042357444763184, "logps/rejected": -3.4073917865753174, "loss": 0.4066, "nll_loss": 0.3905511498451233, "rewards/accuracies": 1.0, "rewards/chosen": -0.1504235714673996, "rewards/margins": 0.1903156191110611, "rewards/rejected": -0.3407391905784607, "step": 7171 }, { "epoch": 19.63586584531143, "grad_norm": 3.514875888824463, "learning_rate": 1.7534246575342463e-08, "log_odds_chosen": 1.9303481578826904, "log_odds_ratio": -0.2610442042350769, "logits/chosen": 1.1918797492980957, "logits/rejected": 1.115983247756958, "logps/chosen": -1.5722179412841797, "logps/rejected": -3.3049986362457275, "loss": 0.4551, "nll_loss": 0.42894840240478516, "rewards/accuracies": 0.875, "rewards/chosen": -0.15722180902957916, "rewards/margins": 0.1732780635356903, "rewards/rejected": -0.33049988746643066, "step": 7172 }, { "epoch": 19.638603696098563, "grad_norm": 5.590575218200684, "learning_rate": 1.7397260273972602e-08, "log_odds_chosen": 2.971519947052002, "log_odds_ratio": -0.14997611939907074, "logits/chosen": 1.1323227882385254, "logits/rejected": 1.1162217855453491, "logps/chosen": -1.9318642616271973, "logps/rejected": -4.652142524719238, "loss": 0.5504, "nll_loss": 0.5354444980621338, "rewards/accuracies": 1.0, "rewards/chosen": -0.1931864321231842, "rewards/margins": 0.2720278203487396, "rewards/rejected": -0.4652142822742462, "step": 7173 }, { "epoch": 19.641341546885695, "grad_norm": 3.8029258251190186, "learning_rate": 1.726027397260274e-08, "log_odds_chosen": 3.258185386657715, "log_odds_ratio": -0.18430092930793762, "logits/chosen": 1.0290067195892334, "logits/rejected": 1.0765390396118164, "logps/chosen": -1.9269611835479736, "logps/rejected": -4.9882612228393555, "loss": 0.5549, "nll_loss": 0.5364842414855957, "rewards/accuracies": 1.0, "rewards/chosen": -0.19269609451293945, "rewards/margins": 0.306130051612854, "rewards/rejected": -0.49882614612579346, "step": 7174 }, { "epoch": 19.644079397672826, "grad_norm": 5.289499759674072, "learning_rate": 1.7123287671232876e-08, "log_odds_chosen": 3.5296385288238525, "log_odds_ratio": -0.33071738481521606, "logits/chosen": 1.0817694664001465, "logits/rejected": 1.1766530275344849, "logps/chosen": -2.3755557537078857, "logps/rejected": -5.7650370597839355, "loss": 0.6595, "nll_loss": 0.6264009475708008, "rewards/accuracies": 0.75, "rewards/chosen": -0.23755556344985962, "rewards/margins": 0.33894816040992737, "rewards/rejected": -0.5765037536621094, "step": 7175 }, { "epoch": 19.64681724845996, "grad_norm": 5.365086078643799, "learning_rate": 1.6986301369863012e-08, "log_odds_chosen": 4.102860927581787, "log_odds_ratio": -0.15186616778373718, "logits/chosen": 1.0634583234786987, "logits/rejected": 1.0589394569396973, "logps/chosen": -2.387726068496704, "logps/rejected": -6.316622734069824, "loss": 0.58, "nll_loss": 0.5647653341293335, "rewards/accuracies": 0.875, "rewards/chosen": -0.23877260088920593, "rewards/margins": 0.39288970828056335, "rewards/rejected": -0.6316623091697693, "step": 7176 }, { "epoch": 19.64955509924709, "grad_norm": 3.6073081493377686, "learning_rate": 1.684931506849315e-08, "log_odds_chosen": 2.362399101257324, "log_odds_ratio": -0.2105730175971985, "logits/chosen": 1.0518370866775513, "logits/rejected": 1.0106518268585205, "logps/chosen": -1.9098856449127197, "logps/rejected": -4.07738733291626, "loss": 0.4879, "nll_loss": 0.466857373714447, "rewards/accuracies": 1.0, "rewards/chosen": -0.19098857045173645, "rewards/margins": 0.2167501449584961, "rewards/rejected": -0.40773868560791016, "step": 7177 }, { "epoch": 19.652292950034223, "grad_norm": 4.663349151611328, "learning_rate": 1.6712328767123286e-08, "log_odds_chosen": 1.9460850954055786, "log_odds_ratio": -0.22698524594306946, "logits/chosen": 0.88197922706604, "logits/rejected": 0.843295693397522, "logps/chosen": -2.4686689376831055, "logps/rejected": -4.237461090087891, "loss": 0.5319, "nll_loss": 0.5092132687568665, "rewards/accuracies": 1.0, "rewards/chosen": -0.24686689674854279, "rewards/margins": 0.17687919735908508, "rewards/rejected": -0.42374610900878906, "step": 7178 }, { "epoch": 19.655030800821354, "grad_norm": 3.772813081741333, "learning_rate": 1.6575342465753425e-08, "log_odds_chosen": 2.7016053199768066, "log_odds_ratio": -0.18869251012802124, "logits/chosen": 0.956234335899353, "logits/rejected": 0.9606582522392273, "logps/chosen": -1.8487823009490967, "logps/rejected": -4.343630313873291, "loss": 0.5739, "nll_loss": 0.5550785064697266, "rewards/accuracies": 1.0, "rewards/chosen": -0.18487823009490967, "rewards/margins": 0.2494848370552063, "rewards/rejected": -0.43436306715011597, "step": 7179 }, { "epoch": 19.65776865160849, "grad_norm": 13.94804573059082, "learning_rate": 1.643835616438356e-08, "log_odds_chosen": 1.7709707021713257, "log_odds_ratio": -0.6588014364242554, "logits/chosen": 1.323835849761963, "logits/rejected": 1.3139140605926514, "logps/chosen": -1.83542001247406, "logps/rejected": -3.4127469062805176, "loss": 0.5099, "nll_loss": 0.44399458169937134, "rewards/accuracies": 0.75, "rewards/chosen": -0.18354201316833496, "rewards/margins": 0.15773272514343262, "rewards/rejected": -0.3412747085094452, "step": 7180 }, { "epoch": 19.66050650239562, "grad_norm": 3.7089836597442627, "learning_rate": 1.6301369863013696e-08, "log_odds_chosen": 4.123527526855469, "log_odds_ratio": -0.08904968202114105, "logits/chosen": 1.3368372917175293, "logits/rejected": 1.3533284664154053, "logps/chosen": -1.5381968021392822, "logps/rejected": -5.367810249328613, "loss": 0.5257, "nll_loss": 0.5168150663375854, "rewards/accuracies": 1.0, "rewards/chosen": -0.15381968021392822, "rewards/margins": 0.38296133279800415, "rewards/rejected": -0.5367810726165771, "step": 7181 }, { "epoch": 19.66324435318275, "grad_norm": 4.791967391967773, "learning_rate": 1.6164383561643835e-08, "log_odds_chosen": 1.8807322978973389, "log_odds_ratio": -0.39784109592437744, "logits/chosen": 1.0662800073623657, "logits/rejected": 1.0213662385940552, "logps/chosen": -1.7549922466278076, "logps/rejected": -3.500365734100342, "loss": 0.6283, "nll_loss": 0.5885234475135803, "rewards/accuracies": 0.875, "rewards/chosen": -0.17549923062324524, "rewards/margins": 0.17453736066818237, "rewards/rejected": -0.35003662109375, "step": 7182 }, { "epoch": 19.665982203969882, "grad_norm": 6.232611179351807, "learning_rate": 1.602739726027397e-08, "log_odds_chosen": 2.1952013969421387, "log_odds_ratio": -0.33176153898239136, "logits/chosen": 0.9138143062591553, "logits/rejected": 0.859321117401123, "logps/chosen": -2.2553985118865967, "logps/rejected": -4.265624046325684, "loss": 0.5406, "nll_loss": 0.5073770880699158, "rewards/accuracies": 0.75, "rewards/chosen": -0.22553984820842743, "rewards/margins": 0.20102258026599884, "rewards/rejected": -0.42656242847442627, "step": 7183 }, { "epoch": 19.668720054757017, "grad_norm": 4.3807854652404785, "learning_rate": 1.589041095890411e-08, "log_odds_chosen": 2.3670618534088135, "log_odds_ratio": -0.18313080072402954, "logits/chosen": 1.1743030548095703, "logits/rejected": 1.2262600660324097, "logps/chosen": -2.180011510848999, "logps/rejected": -4.3961687088012695, "loss": 0.5715, "nll_loss": 0.5531764626502991, "rewards/accuracies": 1.0, "rewards/chosen": -0.21800115704536438, "rewards/margins": 0.22161570191383362, "rewards/rejected": -0.439616858959198, "step": 7184 }, { "epoch": 19.67145790554415, "grad_norm": 4.012603759765625, "learning_rate": 1.5753424657534248e-08, "log_odds_chosen": 2.640559196472168, "log_odds_ratio": -0.2187817096710205, "logits/chosen": 1.25113844871521, "logits/rejected": 1.1826035976409912, "logps/chosen": -1.6994879245758057, "logps/rejected": -4.170610427856445, "loss": 0.5243, "nll_loss": 0.5023755431175232, "rewards/accuracies": 1.0, "rewards/chosen": -0.16994880139827728, "rewards/margins": 0.24711225926876068, "rewards/rejected": -0.41706106066703796, "step": 7185 }, { "epoch": 19.67419575633128, "grad_norm": 3.299142837524414, "learning_rate": 1.5616438356164384e-08, "log_odds_chosen": 3.1073484420776367, "log_odds_ratio": -0.11737755686044693, "logits/chosen": 1.3754768371582031, "logits/rejected": 1.3674131631851196, "logps/chosen": -1.2651152610778809, "logps/rejected": -4.050609111785889, "loss": 0.4702, "nll_loss": 0.4584936797618866, "rewards/accuracies": 1.0, "rewards/chosen": -0.12651152908802032, "rewards/margins": 0.2785494029521942, "rewards/rejected": -0.40506094694137573, "step": 7186 }, { "epoch": 19.67693360711841, "grad_norm": 3.179239273071289, "learning_rate": 1.547945205479452e-08, "log_odds_chosen": 4.878505706787109, "log_odds_ratio": -0.13727551698684692, "logits/chosen": 0.9513028860092163, "logits/rejected": 0.9593175649642944, "logps/chosen": -1.5237520933151245, "logps/rejected": -6.170257568359375, "loss": 0.5279, "nll_loss": 0.5142173767089844, "rewards/accuracies": 1.0, "rewards/chosen": -0.1523752063512802, "rewards/margins": 0.46465057134628296, "rewards/rejected": -0.6170257329940796, "step": 7187 }, { "epoch": 19.679671457905545, "grad_norm": 4.98998498916626, "learning_rate": 1.5342465753424658e-08, "log_odds_chosen": 2.3343505859375, "log_odds_ratio": -0.4058475196361542, "logits/chosen": 1.0406397581100464, "logits/rejected": 1.1202802658081055, "logps/chosen": -2.3850390911102295, "logps/rejected": -4.561869144439697, "loss": 0.5538, "nll_loss": 0.5132045745849609, "rewards/accuracies": 0.875, "rewards/chosen": -0.23850391805171967, "rewards/margins": 0.21768297255039215, "rewards/rejected": -0.4561868906021118, "step": 7188 }, { "epoch": 19.682409308692677, "grad_norm": 3.4872586727142334, "learning_rate": 1.5205479452054794e-08, "log_odds_chosen": 3.467543601989746, "log_odds_ratio": -0.1542462706565857, "logits/chosen": 1.12069833278656, "logits/rejected": 1.1030892133712769, "logps/chosen": -1.5077742338180542, "logps/rejected": -4.721804618835449, "loss": 0.4684, "nll_loss": 0.45297569036483765, "rewards/accuracies": 1.0, "rewards/chosen": -0.1507774293422699, "rewards/margins": 0.3214030861854553, "rewards/rejected": -0.47218048572540283, "step": 7189 }, { "epoch": 19.685147159479808, "grad_norm": 3.97949481010437, "learning_rate": 1.5068493150684933e-08, "log_odds_chosen": 2.329376459121704, "log_odds_ratio": -0.31205084919929504, "logits/chosen": 1.2437312602996826, "logits/rejected": 1.2599945068359375, "logps/chosen": -2.32436203956604, "logps/rejected": -4.5717692375183105, "loss": 0.5799, "nll_loss": 0.5486634969711304, "rewards/accuracies": 0.75, "rewards/chosen": -0.23243622481822968, "rewards/margins": 0.22474071383476257, "rewards/rejected": -0.45717692375183105, "step": 7190 }, { "epoch": 19.68788501026694, "grad_norm": 4.139144420623779, "learning_rate": 1.4931506849315068e-08, "log_odds_chosen": 4.323626518249512, "log_odds_ratio": -0.13763496279716492, "logits/chosen": 1.0717941522598267, "logits/rejected": 1.0819088220596313, "logps/chosen": -1.8250949382781982, "logps/rejected": -5.9384613037109375, "loss": 0.5819, "nll_loss": 0.5681087970733643, "rewards/accuracies": 1.0, "rewards/chosen": -0.18250949680805206, "rewards/margins": 0.41133666038513184, "rewards/rejected": -0.5938461422920227, "step": 7191 }, { "epoch": 19.690622861054074, "grad_norm": 3.6408746242523193, "learning_rate": 1.4794520547945204e-08, "log_odds_chosen": 2.727611541748047, "log_odds_ratio": -0.2023218870162964, "logits/chosen": 1.0043878555297852, "logits/rejected": 0.984239935874939, "logps/chosen": -1.956634759902954, "logps/rejected": -4.486368179321289, "loss": 0.5298, "nll_loss": 0.5096057057380676, "rewards/accuracies": 0.875, "rewards/chosen": -0.1956634670495987, "rewards/margins": 0.252973347902298, "rewards/rejected": -0.44863682985305786, "step": 7192 }, { "epoch": 19.693360711841205, "grad_norm": 3.8557066917419434, "learning_rate": 1.4657534246575342e-08, "log_odds_chosen": 4.23485803604126, "log_odds_ratio": -0.1787089854478836, "logits/chosen": 1.029408574104309, "logits/rejected": 1.0305311679840088, "logps/chosen": -1.8286969661712646, "logps/rejected": -5.844227313995361, "loss": 0.5786, "nll_loss": 0.5606958866119385, "rewards/accuracies": 1.0, "rewards/chosen": -0.18286970257759094, "rewards/margins": 0.40155303478240967, "rewards/rejected": -0.584422767162323, "step": 7193 }, { "epoch": 19.696098562628336, "grad_norm": 3.385375738143921, "learning_rate": 1.452054794520548e-08, "log_odds_chosen": 4.026988506317139, "log_odds_ratio": -0.08832725882530212, "logits/chosen": 0.8222990036010742, "logits/rejected": 0.7101379632949829, "logps/chosen": -1.4562954902648926, "logps/rejected": -5.208293914794922, "loss": 0.5864, "nll_loss": 0.5775789618492126, "rewards/accuracies": 1.0, "rewards/chosen": -0.14562955498695374, "rewards/margins": 0.3751998543739319, "rewards/rejected": -0.520829439163208, "step": 7194 }, { "epoch": 19.698836413415467, "grad_norm": 3.6230177879333496, "learning_rate": 1.4383561643835615e-08, "log_odds_chosen": 2.798825740814209, "log_odds_ratio": -0.27741149067878723, "logits/chosen": 1.1479415893554688, "logits/rejected": 1.1809056997299194, "logps/chosen": -1.4988515377044678, "logps/rejected": -4.109342575073242, "loss": 0.4772, "nll_loss": 0.44941699504852295, "rewards/accuracies": 0.875, "rewards/chosen": -0.1498851627111435, "rewards/margins": 0.26104915142059326, "rewards/rejected": -0.41093429923057556, "step": 7195 }, { "epoch": 19.701574264202602, "grad_norm": 4.190192222595215, "learning_rate": 1.4246575342465752e-08, "log_odds_chosen": 5.922228813171387, "log_odds_ratio": -0.03042638674378395, "logits/chosen": 1.287193775177002, "logits/rejected": 1.3819303512573242, "logps/chosen": -2.2955307960510254, "logps/rejected": -8.014564514160156, "loss": 0.649, "nll_loss": 0.6459246873855591, "rewards/accuracies": 1.0, "rewards/chosen": -0.22955307364463806, "rewards/margins": 0.5719034671783447, "rewards/rejected": -0.8014565110206604, "step": 7196 }, { "epoch": 19.704312114989733, "grad_norm": 3.7236411571502686, "learning_rate": 1.4109589041095891e-08, "log_odds_chosen": 5.104928970336914, "log_odds_ratio": -0.08369739353656769, "logits/chosen": 0.9730305671691895, "logits/rejected": 0.9782357215881348, "logps/chosen": -1.9133909940719604, "logps/rejected": -6.861391067504883, "loss": 0.6871, "nll_loss": 0.6787756681442261, "rewards/accuracies": 1.0, "rewards/chosen": -0.19133910536766052, "rewards/margins": 0.49480003118515015, "rewards/rejected": -0.6861391067504883, "step": 7197 }, { "epoch": 19.707049965776864, "grad_norm": 3.1795759201049805, "learning_rate": 1.3972602739726027e-08, "log_odds_chosen": 3.552764654159546, "log_odds_ratio": -0.13931521773338318, "logits/chosen": 1.2662031650543213, "logits/rejected": 1.2942488193511963, "logps/chosen": -1.426702857017517, "logps/rejected": -4.666919231414795, "loss": 0.5412, "nll_loss": 0.5272386074066162, "rewards/accuracies": 1.0, "rewards/chosen": -0.14267028868198395, "rewards/margins": 0.3240216374397278, "rewards/rejected": -0.4666919410228729, "step": 7198 }, { "epoch": 19.709787816563995, "grad_norm": 7.337609767913818, "learning_rate": 1.3835616438356164e-08, "log_odds_chosen": 2.6885876655578613, "log_odds_ratio": -0.6537732481956482, "logits/chosen": 1.0267802476882935, "logits/rejected": 1.0310025215148926, "logps/chosen": -2.5689377784729004, "logps/rejected": -5.057462692260742, "loss": 0.7053, "nll_loss": 0.6399002075195312, "rewards/accuracies": 0.875, "rewards/chosen": -0.2568938136100769, "rewards/margins": 0.24885252118110657, "rewards/rejected": -0.5057463049888611, "step": 7199 }, { "epoch": 19.71252566735113, "grad_norm": 3.8380305767059326, "learning_rate": 1.36986301369863e-08, "log_odds_chosen": 1.9913735389709473, "log_odds_ratio": -0.24188154935836792, "logits/chosen": 1.399336576461792, "logits/rejected": 1.3620604276657104, "logps/chosen": -1.6284198760986328, "logps/rejected": -3.3853909969329834, "loss": 0.4977, "nll_loss": 0.4734714925289154, "rewards/accuracies": 1.0, "rewards/chosen": -0.16284199059009552, "rewards/margins": 0.17569708824157715, "rewards/rejected": -0.33853909373283386, "step": 7200 }, { "epoch": 19.71526351813826, "grad_norm": 3.4948172569274902, "learning_rate": 1.3561643835616438e-08, "log_odds_chosen": 2.836544990539551, "log_odds_ratio": -0.2208169847726822, "logits/chosen": 0.9934787750244141, "logits/rejected": 0.9605247974395752, "logps/chosen": -2.307013988494873, "logps/rejected": -4.953481674194336, "loss": 0.4745, "nll_loss": 0.4524654150009155, "rewards/accuracies": 0.875, "rewards/chosen": -0.23070141673088074, "rewards/margins": 0.2646467685699463, "rewards/rejected": -0.495348185300827, "step": 7201 }, { "epoch": 19.718001368925393, "grad_norm": 4.873879909515381, "learning_rate": 1.3424657534246575e-08, "log_odds_chosen": 2.004822015762329, "log_odds_ratio": -0.2719185948371887, "logits/chosen": 1.0771275758743286, "logits/rejected": 1.0834159851074219, "logps/chosen": -2.9301443099975586, "logps/rejected": -4.799407005310059, "loss": 0.5689, "nll_loss": 0.5417065024375916, "rewards/accuracies": 0.875, "rewards/chosen": -0.29301443696022034, "rewards/margins": 0.18692627549171448, "rewards/rejected": -0.4799407124519348, "step": 7202 }, { "epoch": 19.720739219712527, "grad_norm": 3.6364963054656982, "learning_rate": 1.3287671232876711e-08, "log_odds_chosen": 3.559783458709717, "log_odds_ratio": -0.09193840622901917, "logits/chosen": 0.9189107418060303, "logits/rejected": 0.915078341960907, "logps/chosen": -2.203366994857788, "logps/rejected": -5.595467567443848, "loss": 0.6823, "nll_loss": 0.6731212139129639, "rewards/accuracies": 1.0, "rewards/chosen": -0.2203367054462433, "rewards/margins": 0.3392100930213928, "rewards/rejected": -0.5595467686653137, "step": 7203 }, { "epoch": 19.72347707049966, "grad_norm": 3.7294278144836426, "learning_rate": 1.315068493150685e-08, "log_odds_chosen": 3.1522207260131836, "log_odds_ratio": -0.2136462777853012, "logits/chosen": 1.188326120376587, "logits/rejected": 1.17913818359375, "logps/chosen": -1.5069714784622192, "logps/rejected": -4.46898889541626, "loss": 0.446, "nll_loss": 0.4246494174003601, "rewards/accuracies": 1.0, "rewards/chosen": -0.15069715678691864, "rewards/margins": 0.29620176553726196, "rewards/rejected": -0.4468989074230194, "step": 7204 }, { "epoch": 19.72621492128679, "grad_norm": 5.9283013343811035, "learning_rate": 1.3013698630136985e-08, "log_odds_chosen": 2.7976694107055664, "log_odds_ratio": -0.20490685105323792, "logits/chosen": 1.0403201580047607, "logits/rejected": 1.0193302631378174, "logps/chosen": -2.5598771572113037, "logps/rejected": -5.199759483337402, "loss": 0.5671, "nll_loss": 0.5466190576553345, "rewards/accuracies": 1.0, "rewards/chosen": -0.2559877038002014, "rewards/margins": 0.26398828625679016, "rewards/rejected": -0.519976019859314, "step": 7205 }, { "epoch": 19.72895277207392, "grad_norm": 6.230846405029297, "learning_rate": 1.2876712328767123e-08, "log_odds_chosen": 3.8083534240722656, "log_odds_ratio": -0.16020654141902924, "logits/chosen": 1.1388401985168457, "logits/rejected": 1.113964319229126, "logps/chosen": -2.8175010681152344, "logps/rejected": -6.478718280792236, "loss": 0.6076, "nll_loss": 0.5915610194206238, "rewards/accuracies": 1.0, "rewards/chosen": -0.2817501127719879, "rewards/margins": 0.36612173914909363, "rewards/rejected": -0.6478718519210815, "step": 7206 }, { "epoch": 19.731690622861056, "grad_norm": 3.9148991107940674, "learning_rate": 1.273972602739726e-08, "log_odds_chosen": 3.428488254547119, "log_odds_ratio": -0.16877394914627075, "logits/chosen": 1.005766749382019, "logits/rejected": 1.0381760597229004, "logps/chosen": -1.3974635601043701, "logps/rejected": -4.514593601226807, "loss": 0.4774, "nll_loss": 0.46047720313072205, "rewards/accuracies": 1.0, "rewards/chosen": -0.13974636793136597, "rewards/margins": 0.3117130398750305, "rewards/rejected": -0.4514594078063965, "step": 7207 }, { "epoch": 19.734428473648187, "grad_norm": 5.751638889312744, "learning_rate": 1.2602739726027397e-08, "log_odds_chosen": 2.1478819847106934, "log_odds_ratio": -0.35372570157051086, "logits/chosen": 1.273303747177124, "logits/rejected": 1.2474079132080078, "logps/chosen": -1.948927402496338, "logps/rejected": -3.9205355644226074, "loss": 0.481, "nll_loss": 0.445654034614563, "rewards/accuracies": 0.875, "rewards/chosen": -0.1948927491903305, "rewards/margins": 0.19716079533100128, "rewards/rejected": -0.3920535743236542, "step": 7208 }, { "epoch": 19.737166324435318, "grad_norm": 3.43210506439209, "learning_rate": 1.2465753424657534e-08, "log_odds_chosen": 4.456203460693359, "log_odds_ratio": -0.04691515490412712, "logits/chosen": 1.3619836568832397, "logits/rejected": 1.3901065587997437, "logps/chosen": -1.952358365058899, "logps/rejected": -6.229046821594238, "loss": 0.5071, "nll_loss": 0.5024001598358154, "rewards/accuracies": 1.0, "rewards/chosen": -0.19523584842681885, "rewards/margins": 0.42766880989074707, "rewards/rejected": -0.6229046583175659, "step": 7209 }, { "epoch": 19.73990417522245, "grad_norm": 3.4351742267608643, "learning_rate": 1.232876712328767e-08, "log_odds_chosen": 2.2936959266662598, "log_odds_ratio": -0.17948314547538757, "logits/chosen": 1.234292984008789, "logits/rejected": 1.2001625299453735, "logps/chosen": -1.608947515487671, "logps/rejected": -3.7028496265411377, "loss": 0.5153, "nll_loss": 0.49738872051239014, "rewards/accuracies": 1.0, "rewards/chosen": -0.16089476644992828, "rewards/margins": 0.20939019322395325, "rewards/rejected": -0.3702849745750427, "step": 7210 }, { "epoch": 19.742642026009584, "grad_norm": 6.549415111541748, "learning_rate": 1.2191780821917807e-08, "log_odds_chosen": 1.4638407230377197, "log_odds_ratio": -0.321456640958786, "logits/chosen": 1.0741498470306396, "logits/rejected": 1.0524976253509521, "logps/chosen": -1.7002623081207275, "logps/rejected": -2.967111825942993, "loss": 0.4759, "nll_loss": 0.44375771284103394, "rewards/accuracies": 0.875, "rewards/chosen": -0.1700262427330017, "rewards/margins": 0.1266849786043167, "rewards/rejected": -0.2967112064361572, "step": 7211 }, { "epoch": 19.745379876796715, "grad_norm": 7.664013385772705, "learning_rate": 1.2054794520547946e-08, "log_odds_chosen": 2.0333914756774902, "log_odds_ratio": -0.36221376061439514, "logits/chosen": 1.041266918182373, "logits/rejected": 0.9783852100372314, "logps/chosen": -1.6663979291915894, "logps/rejected": -3.350477695465088, "loss": 0.4429, "nll_loss": 0.4066331386566162, "rewards/accuracies": 0.875, "rewards/chosen": -0.1666397750377655, "rewards/margins": 0.16840800642967224, "rewards/rejected": -0.33504778146743774, "step": 7212 }, { "epoch": 19.748117727583846, "grad_norm": 3.865262031555176, "learning_rate": 1.1917808219178081e-08, "log_odds_chosen": 3.276150941848755, "log_odds_ratio": -0.13594470918178558, "logits/chosen": 1.066885232925415, "logits/rejected": 1.0716440677642822, "logps/chosen": -1.320709466934204, "logps/rejected": -4.313959121704102, "loss": 0.4202, "nll_loss": 0.4066285192966461, "rewards/accuracies": 1.0, "rewards/chosen": -0.13207094371318817, "rewards/margins": 0.29932495951652527, "rewards/rejected": -0.431395947933197, "step": 7213 }, { "epoch": 19.750855578370977, "grad_norm": 3.6853511333465576, "learning_rate": 1.1780821917808218e-08, "log_odds_chosen": 2.764101982116699, "log_odds_ratio": -0.10845980793237686, "logits/chosen": 1.2223460674285889, "logits/rejected": 1.2481379508972168, "logps/chosen": -1.7178127765655518, "logps/rejected": -4.135461807250977, "loss": 0.4248, "nll_loss": 0.4139254093170166, "rewards/accuracies": 1.0, "rewards/chosen": -0.17178130149841309, "rewards/margins": 0.24176491796970367, "rewards/rejected": -0.41354620456695557, "step": 7214 }, { "epoch": 19.753593429158112, "grad_norm": 3.6382651329040527, "learning_rate": 1.1643835616438356e-08, "log_odds_chosen": 3.5771169662475586, "log_odds_ratio": -0.09400928020477295, "logits/chosen": 1.128441333770752, "logits/rejected": 1.1957478523254395, "logps/chosen": -1.8436509370803833, "logps/rejected": -5.2166900634765625, "loss": 0.5889, "nll_loss": 0.5795260071754456, "rewards/accuracies": 1.0, "rewards/chosen": -0.18436509370803833, "rewards/margins": 0.33730387687683105, "rewards/rejected": -0.5216690301895142, "step": 7215 }, { "epoch": 19.756331279945243, "grad_norm": 3.684100866317749, "learning_rate": 1.1506849315068493e-08, "log_odds_chosen": 2.5027260780334473, "log_odds_ratio": -0.19954822957515717, "logits/chosen": 1.3123366832733154, "logits/rejected": 1.3465698957443237, "logps/chosen": -1.8199396133422852, "logps/rejected": -4.195024490356445, "loss": 0.5193, "nll_loss": 0.49932676553726196, "rewards/accuracies": 1.0, "rewards/chosen": -0.1819939762353897, "rewards/margins": 0.23750846087932587, "rewards/rejected": -0.4195024371147156, "step": 7216 }, { "epoch": 19.759069130732374, "grad_norm": 3.846357583999634, "learning_rate": 1.136986301369863e-08, "log_odds_chosen": 4.558793544769287, "log_odds_ratio": -0.11420583724975586, "logits/chosen": 1.1671528816223145, "logits/rejected": 1.1610214710235596, "logps/chosen": -1.7926533222198486, "logps/rejected": -6.162398815155029, "loss": 0.5191, "nll_loss": 0.5077146291732788, "rewards/accuracies": 1.0, "rewards/chosen": -0.1792653501033783, "rewards/margins": 0.43697455525398254, "rewards/rejected": -0.6162399649620056, "step": 7217 }, { "epoch": 19.761806981519506, "grad_norm": 3.7943692207336426, "learning_rate": 1.1232876712328766e-08, "log_odds_chosen": 1.9191851615905762, "log_odds_ratio": -0.3248904347419739, "logits/chosen": 1.3183386325836182, "logits/rejected": 1.3512258529663086, "logps/chosen": -2.0885117053985596, "logps/rejected": -3.8090102672576904, "loss": 0.5062, "nll_loss": 0.4736926257610321, "rewards/accuracies": 0.875, "rewards/chosen": -0.208851158618927, "rewards/margins": 0.1720498502254486, "rewards/rejected": -0.380901038646698, "step": 7218 }, { "epoch": 19.76454483230664, "grad_norm": 3.5690345764160156, "learning_rate": 1.1095890410958904e-08, "log_odds_chosen": 2.113208055496216, "log_odds_ratio": -0.243584543466568, "logits/chosen": 1.3339111804962158, "logits/rejected": 1.26418936252594, "logps/chosen": -1.6730022430419922, "logps/rejected": -3.605283260345459, "loss": 0.4742, "nll_loss": 0.4498020112514496, "rewards/accuracies": 1.0, "rewards/chosen": -0.16730022430419922, "rewards/margins": 0.1932280957698822, "rewards/rejected": -0.3605283498764038, "step": 7219 }, { "epoch": 19.76728268309377, "grad_norm": 3.3614423274993896, "learning_rate": 1.095890410958904e-08, "log_odds_chosen": 3.2461752891540527, "log_odds_ratio": -0.14991706609725952, "logits/chosen": 1.287601113319397, "logits/rejected": 1.2120919227600098, "logps/chosen": -1.8305693864822388, "logps/rejected": -4.914379596710205, "loss": 0.4978, "nll_loss": 0.48284292221069336, "rewards/accuracies": 1.0, "rewards/chosen": -0.18305695056915283, "rewards/margins": 0.30838102102279663, "rewards/rejected": -0.49143797159194946, "step": 7220 }, { "epoch": 19.770020533880903, "grad_norm": 6.770063400268555, "learning_rate": 1.0821917808219177e-08, "log_odds_chosen": 3.9480910301208496, "log_odds_ratio": -0.3421764075756073, "logits/chosen": 1.0747606754302979, "logits/rejected": 1.0235909223556519, "logps/chosen": -2.988058567047119, "logps/rejected": -6.771615982055664, "loss": 0.8157, "nll_loss": 0.781517505645752, "rewards/accuracies": 0.875, "rewards/chosen": -0.298805832862854, "rewards/margins": 0.3783557415008545, "rewards/rejected": -0.6771615743637085, "step": 7221 }, { "epoch": 19.772758384668034, "grad_norm": 3.458974838256836, "learning_rate": 1.0684931506849316e-08, "log_odds_chosen": 2.389230966567993, "log_odds_ratio": -0.1727321445941925, "logits/chosen": 1.113603949546814, "logits/rejected": 1.0772405862808228, "logps/chosen": -1.5516440868377686, "logps/rejected": -3.695746898651123, "loss": 0.4716, "nll_loss": 0.45430752635002136, "rewards/accuracies": 1.0, "rewards/chosen": -0.15516440570354462, "rewards/margins": 0.21441027522087097, "rewards/rejected": -0.3695746660232544, "step": 7222 }, { "epoch": 19.77549623545517, "grad_norm": 3.198988199234009, "learning_rate": 1.0547945205479451e-08, "log_odds_chosen": 4.151885032653809, "log_odds_ratio": -0.1363324373960495, "logits/chosen": 0.9666322469711304, "logits/rejected": 0.973132848739624, "logps/chosen": -2.528442144393921, "logps/rejected": -6.519156455993652, "loss": 0.5442, "nll_loss": 0.5305467844009399, "rewards/accuracies": 1.0, "rewards/chosen": -0.2528442144393921, "rewards/margins": 0.39907142519950867, "rewards/rejected": -0.6519156694412231, "step": 7223 }, { "epoch": 19.7782340862423, "grad_norm": 6.901214122772217, "learning_rate": 1.0410958904109589e-08, "log_odds_chosen": 0.9940023422241211, "log_odds_ratio": -0.35515379905700684, "logits/chosen": 1.2858264446258545, "logits/rejected": 1.2328267097473145, "logps/chosen": -1.6439332962036133, "logps/rejected": -2.4400289058685303, "loss": 0.4614, "nll_loss": 0.42585164308547974, "rewards/accuracies": 0.875, "rewards/chosen": -0.1643933355808258, "rewards/margins": 0.07960955798625946, "rewards/rejected": -0.24400289356708527, "step": 7224 }, { "epoch": 19.78097193702943, "grad_norm": 7.388206481933594, "learning_rate": 1.0273972602739724e-08, "log_odds_chosen": 2.9366683959960938, "log_odds_ratio": -0.3963209092617035, "logits/chosen": 1.2503769397735596, "logits/rejected": 1.2564990520477295, "logps/chosen": -2.0763773918151855, "logps/rejected": -4.870965480804443, "loss": 0.6115, "nll_loss": 0.5718681216239929, "rewards/accuracies": 0.875, "rewards/chosen": -0.207637757062912, "rewards/margins": 0.27945882081985474, "rewards/rejected": -0.4870965778827667, "step": 7225 }, { "epoch": 19.783709787816562, "grad_norm": 3.8769543170928955, "learning_rate": 1.0136986301369863e-08, "log_odds_chosen": 2.8692667484283447, "log_odds_ratio": -0.1377491056919098, "logits/chosen": 1.2315469980239868, "logits/rejected": 1.2743737697601318, "logps/chosen": -1.6922292709350586, "logps/rejected": -4.283581733703613, "loss": 0.4682, "nll_loss": 0.4544055163860321, "rewards/accuracies": 1.0, "rewards/chosen": -0.16922293603420258, "rewards/margins": 0.25913527607917786, "rewards/rejected": -0.42835819721221924, "step": 7226 }, { "epoch": 19.786447638603697, "grad_norm": 3.9804928302764893, "learning_rate": 1e-08, "log_odds_chosen": 2.41667103767395, "log_odds_ratio": -0.21244294941425323, "logits/chosen": 1.084878921508789, "logits/rejected": 1.1608691215515137, "logps/chosen": -2.116760492324829, "logps/rejected": -4.418650150299072, "loss": 0.5059, "nll_loss": 0.48468217253685, "rewards/accuracies": 0.875, "rewards/chosen": -0.21167606115341187, "rewards/margins": 0.23018893599510193, "rewards/rejected": -0.4418650269508362, "step": 7227 }, { "epoch": 19.789185489390828, "grad_norm": 4.414544105529785, "learning_rate": 9.863013698630136e-09, "log_odds_chosen": 3.651089668273926, "log_odds_ratio": -0.21914082765579224, "logits/chosen": 1.102997899055481, "logits/rejected": 1.172226905822754, "logps/chosen": -2.2297425270080566, "logps/rejected": -5.680880546569824, "loss": 0.5457, "nll_loss": 0.5237457752227783, "rewards/accuracies": 1.0, "rewards/chosen": -0.2229742407798767, "rewards/margins": 0.34511375427246094, "rewards/rejected": -0.5680880546569824, "step": 7228 }, { "epoch": 19.79192334017796, "grad_norm": 3.207897186279297, "learning_rate": 9.726027397260273e-09, "log_odds_chosen": 5.219306945800781, "log_odds_ratio": -0.08362746238708496, "logits/chosen": 1.0797457695007324, "logits/rejected": 1.0593032836914062, "logps/chosen": -1.6195403337478638, "logps/rejected": -6.588319301605225, "loss": 0.4958, "nll_loss": 0.48745566606521606, "rewards/accuracies": 1.0, "rewards/chosen": -0.16195404529571533, "rewards/margins": 0.49687790870666504, "rewards/rejected": -0.6588319540023804, "step": 7229 }, { "epoch": 19.794661190965094, "grad_norm": 3.942491292953491, "learning_rate": 9.589041095890412e-09, "log_odds_chosen": 3.4372682571411133, "log_odds_ratio": -0.16401457786560059, "logits/chosen": 1.1440958976745605, "logits/rejected": 1.0477235317230225, "logps/chosen": -1.9357552528381348, "logps/rejected": -5.174808979034424, "loss": 0.4628, "nll_loss": 0.4464375376701355, "rewards/accuracies": 0.875, "rewards/chosen": -0.19357553124427795, "rewards/margins": 0.323905348777771, "rewards/rejected": -0.5174808502197266, "step": 7230 }, { "epoch": 19.797399041752225, "grad_norm": 3.5848991870880127, "learning_rate": 9.452054794520547e-09, "log_odds_chosen": 5.128131866455078, "log_odds_ratio": -0.07239759713411331, "logits/chosen": 1.425934076309204, "logits/rejected": 1.4443367719650269, "logps/chosen": -1.8678169250488281, "logps/rejected": -6.800636291503906, "loss": 0.5627, "nll_loss": 0.5555066466331482, "rewards/accuracies": 1.0, "rewards/chosen": -0.18678168952465057, "rewards/margins": 0.49328190088272095, "rewards/rejected": -0.6800636053085327, "step": 7231 }, { "epoch": 19.800136892539356, "grad_norm": 5.322134971618652, "learning_rate": 9.315068493150684e-09, "log_odds_chosen": 2.634305715560913, "log_odds_ratio": -0.2650206983089447, "logits/chosen": 1.1505882740020752, "logits/rejected": 1.1734371185302734, "logps/chosen": -2.4540951251983643, "logps/rejected": -4.971708297729492, "loss": 0.5178, "nll_loss": 0.49126335978507996, "rewards/accuracies": 0.875, "rewards/chosen": -0.2454095035791397, "rewards/margins": 0.2517613172531128, "rewards/rejected": -0.4971708357334137, "step": 7232 }, { "epoch": 19.802874743326488, "grad_norm": 4.055145263671875, "learning_rate": 9.178082191780822e-09, "log_odds_chosen": 4.83568811416626, "log_odds_ratio": -0.11753536760807037, "logits/chosen": 1.091944694519043, "logits/rejected": 1.166603684425354, "logps/chosen": -2.382922887802124, "logps/rejected": -7.079395294189453, "loss": 0.6397, "nll_loss": 0.6279339790344238, "rewards/accuracies": 1.0, "rewards/chosen": -0.23829227685928345, "rewards/margins": 0.46964722871780396, "rewards/rejected": -0.7079395055770874, "step": 7233 }, { "epoch": 19.805612594113622, "grad_norm": 13.900179862976074, "learning_rate": 9.041095890410959e-09, "log_odds_chosen": 1.7579066753387451, "log_odds_ratio": -0.6685770750045776, "logits/chosen": 1.2242579460144043, "logits/rejected": 1.1872326135635376, "logps/chosen": -3.0054664611816406, "logps/rejected": -4.600306510925293, "loss": 0.6998, "nll_loss": 0.632920503616333, "rewards/accuracies": 0.75, "rewards/chosen": -0.30054664611816406, "rewards/margins": 0.15948399901390076, "rewards/rejected": -0.4600306451320648, "step": 7234 }, { "epoch": 19.808350444900753, "grad_norm": 3.5555293560028076, "learning_rate": 8.904109589041096e-09, "log_odds_chosen": 1.5774743556976318, "log_odds_ratio": -0.32326191663742065, "logits/chosen": 0.9324197769165039, "logits/rejected": 0.8765368461608887, "logps/chosen": -2.468308687210083, "logps/rejected": -3.8955061435699463, "loss": 0.5682, "nll_loss": 0.5359001755714417, "rewards/accuracies": 0.75, "rewards/chosen": -0.24683088064193726, "rewards/margins": 0.14271971583366394, "rewards/rejected": -0.3895506262779236, "step": 7235 }, { "epoch": 19.811088295687885, "grad_norm": 3.4801230430603027, "learning_rate": 8.767123287671232e-09, "log_odds_chosen": 2.071406841278076, "log_odds_ratio": -0.2018866389989853, "logits/chosen": 1.0783672332763672, "logits/rejected": 1.0109517574310303, "logps/chosen": -1.5856201648712158, "logps/rejected": -3.449620246887207, "loss": 0.4653, "nll_loss": 0.4451347589492798, "rewards/accuracies": 1.0, "rewards/chosen": -0.15856203436851501, "rewards/margins": 0.18639998137950897, "rewards/rejected": -0.3449620008468628, "step": 7236 }, { "epoch": 19.813826146475016, "grad_norm": 3.88257098197937, "learning_rate": 8.63013698630137e-09, "log_odds_chosen": 1.6419261693954468, "log_odds_ratio": -0.28380829095840454, "logits/chosen": 1.0442438125610352, "logits/rejected": 0.9586104154586792, "logps/chosen": -1.759657859802246, "logps/rejected": -3.2269279956817627, "loss": 0.5568, "nll_loss": 0.5284498333930969, "rewards/accuracies": 0.875, "rewards/chosen": -0.1759657859802246, "rewards/margins": 0.1467270404100418, "rewards/rejected": -0.3226928114891052, "step": 7237 }, { "epoch": 19.81656399726215, "grad_norm": 3.507063865661621, "learning_rate": 8.493150684931506e-09, "log_odds_chosen": 2.456197738647461, "log_odds_ratio": -0.1593344807624817, "logits/chosen": 0.7937654256820679, "logits/rejected": 0.7382034659385681, "logps/chosen": -1.4908345937728882, "logps/rejected": -3.585862636566162, "loss": 0.5338, "nll_loss": 0.5178187489509583, "rewards/accuracies": 1.0, "rewards/chosen": -0.1490834653377533, "rewards/margins": 0.20950281620025635, "rewards/rejected": -0.35858625173568726, "step": 7238 }, { "epoch": 19.81930184804928, "grad_norm": 5.3525495529174805, "learning_rate": 8.356164383561643e-09, "log_odds_chosen": 2.2248170375823975, "log_odds_ratio": -0.1502583920955658, "logits/chosen": 1.093251347541809, "logits/rejected": 1.0561381578445435, "logps/chosen": -1.8190076351165771, "logps/rejected": -3.8078761100769043, "loss": 0.4899, "nll_loss": 0.4748322069644928, "rewards/accuracies": 1.0, "rewards/chosen": -0.1819007694721222, "rewards/margins": 0.19888687133789062, "rewards/rejected": -0.38078758120536804, "step": 7239 }, { "epoch": 19.822039698836413, "grad_norm": 3.250351905822754, "learning_rate": 8.21917808219178e-09, "log_odds_chosen": 2.7959156036376953, "log_odds_ratio": -0.15052448213100433, "logits/chosen": 0.9503363966941833, "logits/rejected": 0.8785910606384277, "logps/chosen": -1.9126927852630615, "logps/rejected": -4.482794284820557, "loss": 0.4795, "nll_loss": 0.46442630887031555, "rewards/accuracies": 1.0, "rewards/chosen": -0.19126927852630615, "rewards/margins": 0.25701016187667847, "rewards/rejected": -0.4482794404029846, "step": 7240 }, { "epoch": 19.824777549623544, "grad_norm": 4.053317546844482, "learning_rate": 8.082191780821918e-09, "log_odds_chosen": 2.0108304023742676, "log_odds_ratio": -0.25985974073410034, "logits/chosen": 1.1368211507797241, "logits/rejected": 1.046691656112671, "logps/chosen": -1.621055006980896, "logps/rejected": -3.465517044067383, "loss": 0.5013, "nll_loss": 0.47526979446411133, "rewards/accuracies": 1.0, "rewards/chosen": -0.1621055006980896, "rewards/margins": 0.18444620072841644, "rewards/rejected": -0.34655171632766724, "step": 7241 }, { "epoch": 19.82751540041068, "grad_norm": 3.453408718109131, "learning_rate": 7.945205479452055e-09, "log_odds_chosen": 2.6090686321258545, "log_odds_ratio": -0.2104724496603012, "logits/chosen": 1.2484700679779053, "logits/rejected": 1.3118336200714111, "logps/chosen": -1.7398951053619385, "logps/rejected": -4.0254011154174805, "loss": 0.4748, "nll_loss": 0.45374995470046997, "rewards/accuracies": 1.0, "rewards/chosen": -0.17398950457572937, "rewards/margins": 0.22855067253112793, "rewards/rejected": -0.4025401771068573, "step": 7242 }, { "epoch": 19.83025325119781, "grad_norm": 3.3148045539855957, "learning_rate": 7.808219178082192e-09, "log_odds_chosen": 3.336221694946289, "log_odds_ratio": -0.18866056203842163, "logits/chosen": 0.9957576990127563, "logits/rejected": 0.9847691059112549, "logps/chosen": -1.756179928779602, "logps/rejected": -4.8781890869140625, "loss": 0.4434, "nll_loss": 0.4245210289955139, "rewards/accuracies": 1.0, "rewards/chosen": -0.1756179928779602, "rewards/margins": 0.3122009038925171, "rewards/rejected": -0.4878188967704773, "step": 7243 }, { "epoch": 19.83299110198494, "grad_norm": 4.480233669281006, "learning_rate": 7.671232876712329e-09, "log_odds_chosen": 2.3165507316589355, "log_odds_ratio": -0.21208101511001587, "logits/chosen": 1.4504247903823853, "logits/rejected": 1.471195936203003, "logps/chosen": -2.883744716644287, "logps/rejected": -5.100954055786133, "loss": 0.5651, "nll_loss": 0.5439315438270569, "rewards/accuracies": 1.0, "rewards/chosen": -0.28837448358535767, "rewards/margins": 0.22172094881534576, "rewards/rejected": -0.510095477104187, "step": 7244 }, { "epoch": 19.835728952772072, "grad_norm": 3.851008176803589, "learning_rate": 7.534246575342466e-09, "log_odds_chosen": 2.80063796043396, "log_odds_ratio": -0.3380850553512573, "logits/chosen": 1.0712538957595825, "logits/rejected": 1.0073307752609253, "logps/chosen": -2.017794609069824, "logps/rejected": -4.658817768096924, "loss": 0.5638, "nll_loss": 0.5299966931343079, "rewards/accuracies": 0.875, "rewards/chosen": -0.20177945494651794, "rewards/margins": 0.26410233974456787, "rewards/rejected": -0.4658817946910858, "step": 7245 }, { "epoch": 19.838466803559207, "grad_norm": 4.424108028411865, "learning_rate": 7.397260273972602e-09, "log_odds_chosen": 1.854034185409546, "log_odds_ratio": -0.2899912893772125, "logits/chosen": 1.0421456098556519, "logits/rejected": 0.908301055431366, "logps/chosen": -1.8469129800796509, "logps/rejected": -3.5224952697753906, "loss": 0.4737, "nll_loss": 0.44472619891166687, "rewards/accuracies": 0.875, "rewards/chosen": -0.18469130992889404, "rewards/margins": 0.1675582230091095, "rewards/rejected": -0.35224953293800354, "step": 7246 }, { "epoch": 19.84120465434634, "grad_norm": 4.03289794921875, "learning_rate": 7.26027397260274e-09, "log_odds_chosen": 1.507552981376648, "log_odds_ratio": -0.2592211067676544, "logits/chosen": 1.2965973615646362, "logits/rejected": 1.2918701171875, "logps/chosen": -1.9651480913162231, "logps/rejected": -3.312211036682129, "loss": 0.4629, "nll_loss": 0.4369487762451172, "rewards/accuracies": 1.0, "rewards/chosen": -0.1965148150920868, "rewards/margins": 0.1347062885761261, "rewards/rejected": -0.3312210738658905, "step": 7247 }, { "epoch": 19.84394250513347, "grad_norm": 3.583848476409912, "learning_rate": 7.123287671232876e-09, "log_odds_chosen": 3.5496397018432617, "log_odds_ratio": -0.1516294926404953, "logits/chosen": 0.9180367588996887, "logits/rejected": 0.8939035534858704, "logps/chosen": -1.5474767684936523, "logps/rejected": -4.847307205200195, "loss": 0.4387, "nll_loss": 0.4235609769821167, "rewards/accuracies": 1.0, "rewards/chosen": -0.15474767982959747, "rewards/margins": 0.32998308539390564, "rewards/rejected": -0.4847307503223419, "step": 7248 }, { "epoch": 19.8466803559206, "grad_norm": 13.345240592956543, "learning_rate": 6.986301369863013e-09, "log_odds_chosen": 0.7698322534561157, "log_odds_ratio": -0.5758113861083984, "logits/chosen": 1.0809892416000366, "logits/rejected": 1.0181082487106323, "logps/chosen": -3.165553092956543, "logps/rejected": -3.834486484527588, "loss": 0.622, "nll_loss": 0.5644558668136597, "rewards/accuracies": 0.75, "rewards/chosen": -0.31655532121658325, "rewards/margins": 0.0668933093547821, "rewards/rejected": -0.38344863057136536, "step": 7249 }, { "epoch": 19.849418206707735, "grad_norm": 3.461509943008423, "learning_rate": 6.84931506849315e-09, "log_odds_chosen": 3.7976269721984863, "log_odds_ratio": -0.09838387370109558, "logits/chosen": 0.9668334722518921, "logits/rejected": 1.0074126720428467, "logps/chosen": -2.09598445892334, "logps/rejected": -5.706698417663574, "loss": 0.562, "nll_loss": 0.5521237850189209, "rewards/accuracies": 1.0, "rewards/chosen": -0.20959845185279846, "rewards/margins": 0.3610714375972748, "rewards/rejected": -0.5706698894500732, "step": 7250 }, { "epoch": 19.852156057494867, "grad_norm": 5.299224853515625, "learning_rate": 6.712328767123288e-09, "log_odds_chosen": 2.9356813430786133, "log_odds_ratio": -0.23332077264785767, "logits/chosen": 0.9667234420776367, "logits/rejected": 0.918856143951416, "logps/chosen": -1.9868495464324951, "logps/rejected": -4.779353618621826, "loss": 0.6094, "nll_loss": 0.5860971808433533, "rewards/accuracies": 1.0, "rewards/chosen": -0.198684960603714, "rewards/margins": 0.27925044298171997, "rewards/rejected": -0.4779353737831116, "step": 7251 }, { "epoch": 19.854893908281998, "grad_norm": 4.067467212677002, "learning_rate": 6.575342465753425e-09, "log_odds_chosen": 3.678344249725342, "log_odds_ratio": -0.18772763013839722, "logits/chosen": 1.0811105966567993, "logits/rejected": 1.0949879884719849, "logps/chosen": -1.7015910148620605, "logps/rejected": -5.206438064575195, "loss": 0.4519, "nll_loss": 0.4331551194190979, "rewards/accuracies": 1.0, "rewards/chosen": -0.17015908658504486, "rewards/margins": 0.3504846692085266, "rewards/rejected": -0.5206437706947327, "step": 7252 }, { "epoch": 19.85763175906913, "grad_norm": 3.3114495277404785, "learning_rate": 6.438356164383561e-09, "log_odds_chosen": 3.5426902770996094, "log_odds_ratio": -0.1369187831878662, "logits/chosen": 0.9416603446006775, "logits/rejected": 0.99493408203125, "logps/chosen": -1.7496731281280518, "logps/rejected": -5.047266483306885, "loss": 0.4316, "nll_loss": 0.41789528727531433, "rewards/accuracies": 1.0, "rewards/chosen": -0.17496731877326965, "rewards/margins": 0.3297593593597412, "rewards/rejected": -0.5047266483306885, "step": 7253 }, { "epoch": 19.860369609856264, "grad_norm": 5.16355562210083, "learning_rate": 6.3013698630136985e-09, "log_odds_chosen": 4.200784206390381, "log_odds_ratio": -0.14926809072494507, "logits/chosen": 1.2645370960235596, "logits/rejected": 1.2193610668182373, "logps/chosen": -1.8026431798934937, "logps/rejected": -5.792152404785156, "loss": 0.5667, "nll_loss": 0.5518003106117249, "rewards/accuracies": 1.0, "rewards/chosen": -0.18026432394981384, "rewards/margins": 0.39895087480545044, "rewards/rejected": -0.5792152881622314, "step": 7254 }, { "epoch": 19.863107460643395, "grad_norm": 3.303992986679077, "learning_rate": 6.164383561643835e-09, "log_odds_chosen": 2.0997674465179443, "log_odds_ratio": -0.18196049332618713, "logits/chosen": 0.9299967288970947, "logits/rejected": 0.9066504240036011, "logps/chosen": -1.7265251874923706, "logps/rejected": -3.668130874633789, "loss": 0.4451, "nll_loss": 0.42695316672325134, "rewards/accuracies": 1.0, "rewards/chosen": -0.17265251278877258, "rewards/margins": 0.1941605806350708, "rewards/rejected": -0.36681312322616577, "step": 7255 }, { "epoch": 19.865845311430526, "grad_norm": 3.54799222946167, "learning_rate": 6.027397260273973e-09, "log_odds_chosen": 4.2768449783325195, "log_odds_ratio": -0.1438974291086197, "logits/chosen": 1.5085541009902954, "logits/rejected": 1.4586716890335083, "logps/chosen": -1.05433189868927, "logps/rejected": -4.771288871765137, "loss": 0.4716, "nll_loss": 0.45724916458129883, "rewards/accuracies": 1.0, "rewards/chosen": -0.10543318837881088, "rewards/margins": 0.37169569730758667, "rewards/rejected": -0.47712892293930054, "step": 7256 }, { "epoch": 19.86858316221766, "grad_norm": 10.73437213897705, "learning_rate": 5.890410958904109e-09, "log_odds_chosen": 2.1255550384521484, "log_odds_ratio": -0.421453595161438, "logits/chosen": 1.0001025199890137, "logits/rejected": 0.9873185157775879, "logps/chosen": -2.365473508834839, "logps/rejected": -4.4071784019470215, "loss": 0.5595, "nll_loss": 0.51730877161026, "rewards/accuracies": 0.75, "rewards/chosen": -0.23654736578464508, "rewards/margins": 0.20417043566703796, "rewards/rejected": -0.44071781635284424, "step": 7257 }, { "epoch": 19.871321013004792, "grad_norm": 3.7202179431915283, "learning_rate": 5.753424657534246e-09, "log_odds_chosen": 2.6534695625305176, "log_odds_ratio": -0.2343551367521286, "logits/chosen": 1.5796682834625244, "logits/rejected": 1.5757529735565186, "logps/chosen": -1.6923612356185913, "logps/rejected": -4.133294105529785, "loss": 0.4502, "nll_loss": 0.4267158806324005, "rewards/accuracies": 1.0, "rewards/chosen": -0.16923612356185913, "rewards/margins": 0.24409329891204834, "rewards/rejected": -0.41332942247390747, "step": 7258 }, { "epoch": 19.874058863791923, "grad_norm": 3.7747418880462646, "learning_rate": 5.616438356164383e-09, "log_odds_chosen": 5.845987319946289, "log_odds_ratio": -0.09310413897037506, "logits/chosen": 1.0666556358337402, "logits/rejected": 1.062875509262085, "logps/chosen": -2.0344150066375732, "logps/rejected": -7.653323650360107, "loss": 0.6431, "nll_loss": 0.6337451338768005, "rewards/accuracies": 1.0, "rewards/chosen": -0.20344150066375732, "rewards/margins": 0.5618907809257507, "rewards/rejected": -0.7653322815895081, "step": 7259 }, { "epoch": 19.876796714579054, "grad_norm": 3.8992109298706055, "learning_rate": 5.47945205479452e-09, "log_odds_chosen": 2.0545601844787598, "log_odds_ratio": -0.15396513044834137, "logits/chosen": 1.1759976148605347, "logits/rejected": 1.144234299659729, "logps/chosen": -1.9814999103546143, "logps/rejected": -3.8695437908172607, "loss": 0.4463, "nll_loss": 0.43089622259140015, "rewards/accuracies": 1.0, "rewards/chosen": -0.19814999401569366, "rewards/margins": 0.18880441784858704, "rewards/rejected": -0.3869544267654419, "step": 7260 }, { "epoch": 19.87953456536619, "grad_norm": 3.9727065563201904, "learning_rate": 5.342465753424658e-09, "log_odds_chosen": 3.074401617050171, "log_odds_ratio": -0.18452753126621246, "logits/chosen": 1.4096962213516235, "logits/rejected": 1.4059810638427734, "logps/chosen": -2.055368423461914, "logps/rejected": -4.945235729217529, "loss": 0.4749, "nll_loss": 0.4564252495765686, "rewards/accuracies": 1.0, "rewards/chosen": -0.2055368423461914, "rewards/margins": 0.2889866828918457, "rewards/rejected": -0.4945235550403595, "step": 7261 }, { "epoch": 19.88227241615332, "grad_norm": 3.500372886657715, "learning_rate": 5.205479452054794e-09, "log_odds_chosen": 2.3871426582336426, "log_odds_ratio": -0.22183579206466675, "logits/chosen": 1.441124439239502, "logits/rejected": 1.416669487953186, "logps/chosen": -1.3880921602249146, "logps/rejected": -3.5544495582580566, "loss": 0.4263, "nll_loss": 0.40407076478004456, "rewards/accuracies": 1.0, "rewards/chosen": -0.1388092339038849, "rewards/margins": 0.21663576364517212, "rewards/rejected": -0.3554449677467346, "step": 7262 }, { "epoch": 19.88501026694045, "grad_norm": 4.481349468231201, "learning_rate": 5.0684931506849315e-09, "log_odds_chosen": 3.2657930850982666, "log_odds_ratio": -0.2762860059738159, "logits/chosen": 1.2775880098342896, "logits/rejected": 1.1623376607894897, "logps/chosen": -1.2687486410140991, "logps/rejected": -4.244085311889648, "loss": 0.5227, "nll_loss": 0.49511587619781494, "rewards/accuracies": 0.875, "rewards/chosen": -0.1268748641014099, "rewards/margins": 0.29753366112709045, "rewards/rejected": -0.42440852522850037, "step": 7263 }, { "epoch": 19.887748117727583, "grad_norm": 3.487687349319458, "learning_rate": 4.931506849315068e-09, "log_odds_chosen": 5.5954999923706055, "log_odds_ratio": -0.036737408488988876, "logits/chosen": 1.1208839416503906, "logits/rejected": 1.1574122905731201, "logps/chosen": -1.7054648399353027, "logps/rejected": -7.051706314086914, "loss": 0.5921, "nll_loss": 0.5883821249008179, "rewards/accuracies": 1.0, "rewards/chosen": -0.1705465018749237, "rewards/margins": 0.5346241593360901, "rewards/rejected": -0.7051706314086914, "step": 7264 }, { "epoch": 19.890485968514717, "grad_norm": 4.001723766326904, "learning_rate": 4.794520547945206e-09, "log_odds_chosen": 4.0223188400268555, "log_odds_ratio": -0.2699452042579651, "logits/chosen": 0.7804021835327148, "logits/rejected": 0.880220890045166, "logps/chosen": -2.203768730163574, "logps/rejected": -6.125698566436768, "loss": 0.7563, "nll_loss": 0.7293146848678589, "rewards/accuracies": 0.875, "rewards/chosen": -0.2203768789768219, "rewards/margins": 0.3921929597854614, "rewards/rejected": -0.6125699281692505, "step": 7265 }, { "epoch": 19.89322381930185, "grad_norm": 3.4244580268859863, "learning_rate": 4.657534246575342e-09, "log_odds_chosen": 3.226156234741211, "log_odds_ratio": -0.17019255459308624, "logits/chosen": 1.034221887588501, "logits/rejected": 1.002197027206421, "logps/chosen": -1.4825035333633423, "logps/rejected": -4.452820777893066, "loss": 0.4693, "nll_loss": 0.45227915048599243, "rewards/accuracies": 1.0, "rewards/chosen": -0.14825035631656647, "rewards/margins": 0.2970317006111145, "rewards/rejected": -0.4452820420265198, "step": 7266 }, { "epoch": 19.89596167008898, "grad_norm": 4.529177188873291, "learning_rate": 4.5205479452054794e-09, "log_odds_chosen": 1.3306267261505127, "log_odds_ratio": -0.35386982560157776, "logits/chosen": 1.0780341625213623, "logits/rejected": 1.0039052963256836, "logps/chosen": -1.3626495599746704, "logps/rejected": -2.5401899814605713, "loss": 0.4947, "nll_loss": 0.45933330059051514, "rewards/accuracies": 1.0, "rewards/chosen": -0.13626495003700256, "rewards/margins": 0.11775404214859009, "rewards/rejected": -0.25401899218559265, "step": 7267 }, { "epoch": 19.89869952087611, "grad_norm": 6.841058731079102, "learning_rate": 4.383561643835616e-09, "log_odds_chosen": 4.182857036590576, "log_odds_ratio": -0.1717938482761383, "logits/chosen": 0.8203937411308289, "logits/rejected": 0.7383174896240234, "logps/chosen": -1.6802372932434082, "logps/rejected": -5.66132926940918, "loss": 0.5786, "nll_loss": 0.5614649057388306, "rewards/accuracies": 1.0, "rewards/chosen": -0.1680237352848053, "rewards/margins": 0.39810919761657715, "rewards/rejected": -0.5661329030990601, "step": 7268 }, { "epoch": 19.901437371663246, "grad_norm": 4.049590587615967, "learning_rate": 4.246575342465753e-09, "log_odds_chosen": 3.2780942916870117, "log_odds_ratio": -0.1136537492275238, "logits/chosen": 0.9754847884178162, "logits/rejected": 0.9695309400558472, "logps/chosen": -1.2220113277435303, "logps/rejected": -4.045570373535156, "loss": 0.4337, "nll_loss": 0.4223819077014923, "rewards/accuracies": 1.0, "rewards/chosen": -0.12220113724470139, "rewards/margins": 0.2823558747768402, "rewards/rejected": -0.4045569896697998, "step": 7269 }, { "epoch": 19.904175222450377, "grad_norm": 6.907508373260498, "learning_rate": 4.10958904109589e-09, "log_odds_chosen": 2.5583126544952393, "log_odds_ratio": -0.5857375860214233, "logits/chosen": 1.2750147581100464, "logits/rejected": 1.3248422145843506, "logps/chosen": -2.485304832458496, "logps/rejected": -4.944270610809326, "loss": 0.6132, "nll_loss": 0.5546181201934814, "rewards/accuracies": 0.75, "rewards/chosen": -0.24853047728538513, "rewards/margins": 0.245896577835083, "rewards/rejected": -0.49442702531814575, "step": 7270 }, { "epoch": 19.906913073237508, "grad_norm": 3.60807204246521, "learning_rate": 3.972602739726027e-09, "log_odds_chosen": 4.224221229553223, "log_odds_ratio": -0.22376379370689392, "logits/chosen": 1.023179531097412, "logits/rejected": 1.066300392150879, "logps/chosen": -2.0056326389312744, "logps/rejected": -6.056270599365234, "loss": 0.6593, "nll_loss": 0.6368979215621948, "rewards/accuracies": 0.875, "rewards/chosen": -0.20056326687335968, "rewards/margins": 0.40506380796432495, "rewards/rejected": -0.6056270599365234, "step": 7271 }, { "epoch": 19.90965092402464, "grad_norm": 3.8302724361419678, "learning_rate": 3.8356164383561645e-09, "log_odds_chosen": 2.648399829864502, "log_odds_ratio": -0.1959695816040039, "logits/chosen": 1.1149331331253052, "logits/rejected": 1.1779942512512207, "logps/chosen": -1.9392480850219727, "logps/rejected": -4.391944885253906, "loss": 0.4996, "nll_loss": 0.47997617721557617, "rewards/accuracies": 0.875, "rewards/chosen": -0.19392481446266174, "rewards/margins": 0.24526968598365784, "rewards/rejected": -0.43919453024864197, "step": 7272 }, { "epoch": 19.912388774811774, "grad_norm": 7.477916240692139, "learning_rate": 3.698630136986301e-09, "log_odds_chosen": 2.723493814468384, "log_odds_ratio": -0.7242211699485779, "logits/chosen": 1.0913195610046387, "logits/rejected": 1.1387748718261719, "logps/chosen": -3.2768125534057617, "logps/rejected": -5.900892734527588, "loss": 0.7149, "nll_loss": 0.6424900889396667, "rewards/accuracies": 0.875, "rewards/chosen": -0.3276812732219696, "rewards/margins": 0.262408047914505, "rewards/rejected": -0.5900893211364746, "step": 7273 }, { "epoch": 19.915126625598905, "grad_norm": 6.903688430786133, "learning_rate": 3.561643835616438e-09, "log_odds_chosen": 3.7220232486724854, "log_odds_ratio": -0.3116621971130371, "logits/chosen": 1.1763479709625244, "logits/rejected": 1.1503019332885742, "logps/chosen": -2.225959300994873, "logps/rejected": -5.770544528961182, "loss": 0.7025, "nll_loss": 0.6713186502456665, "rewards/accuracies": 0.875, "rewards/chosen": -0.2225959300994873, "rewards/margins": 0.3544585704803467, "rewards/rejected": -0.577054500579834, "step": 7274 }, { "epoch": 19.917864476386036, "grad_norm": 7.496622562408447, "learning_rate": 3.424657534246575e-09, "log_odds_chosen": 1.7079222202301025, "log_odds_ratio": -0.4209060072898865, "logits/chosen": 1.0837085247039795, "logits/rejected": 0.9857836961746216, "logps/chosen": -2.198695182800293, "logps/rejected": -3.708721160888672, "loss": 0.6147, "nll_loss": 0.5726186037063599, "rewards/accuracies": 0.875, "rewards/chosen": -0.21986952424049377, "rewards/margins": 0.15100258588790894, "rewards/rejected": -0.3708721101284027, "step": 7275 }, { "epoch": 19.920602327173167, "grad_norm": 3.7405800819396973, "learning_rate": 3.2876712328767125e-09, "log_odds_chosen": 2.3597779273986816, "log_odds_ratio": -0.22342324256896973, "logits/chosen": 0.9792014360427856, "logits/rejected": 0.9773754477500916, "logps/chosen": -1.8248748779296875, "logps/rejected": -4.015997409820557, "loss": 0.4737, "nll_loss": 0.45136791467666626, "rewards/accuracies": 1.0, "rewards/chosen": -0.18248748779296875, "rewards/margins": 0.21911227703094482, "rewards/rejected": -0.4015997648239136, "step": 7276 }, { "epoch": 19.923340177960302, "grad_norm": 3.8706233501434326, "learning_rate": 3.1506849315068492e-09, "log_odds_chosen": 3.0129036903381348, "log_odds_ratio": -0.17311139404773712, "logits/chosen": 1.0442638397216797, "logits/rejected": 0.9597740173339844, "logps/chosen": -1.9417147636413574, "logps/rejected": -4.797313690185547, "loss": 0.5112, "nll_loss": 0.4938870370388031, "rewards/accuracies": 1.0, "rewards/chosen": -0.1941714882850647, "rewards/margins": 0.28555992245674133, "rewards/rejected": -0.47973138093948364, "step": 7277 }, { "epoch": 19.926078028747433, "grad_norm": 5.890668869018555, "learning_rate": 3.0136986301369864e-09, "log_odds_chosen": 1.883626937866211, "log_odds_ratio": -0.25337278842926025, "logits/chosen": 1.0021989345550537, "logits/rejected": 0.9982305765151978, "logps/chosen": -1.6907637119293213, "logps/rejected": -3.3945608139038086, "loss": 0.5146, "nll_loss": 0.48924529552459717, "rewards/accuracies": 1.0, "rewards/chosen": -0.16907638311386108, "rewards/margins": 0.17037971317768097, "rewards/rejected": -0.33945608139038086, "step": 7278 }, { "epoch": 19.928815879534564, "grad_norm": 3.381070852279663, "learning_rate": 2.876712328767123e-09, "log_odds_chosen": 3.4253313541412354, "log_odds_ratio": -0.19322684407234192, "logits/chosen": 1.0544140338897705, "logits/rejected": 1.0169905424118042, "logps/chosen": -1.4884666204452515, "logps/rejected": -4.627376556396484, "loss": 0.4071, "nll_loss": 0.38777250051498413, "rewards/accuracies": 1.0, "rewards/chosen": -0.14884667098522186, "rewards/margins": 0.3138909637928009, "rewards/rejected": -0.4627376198768616, "step": 7279 }, { "epoch": 19.931553730321696, "grad_norm": 4.875039100646973, "learning_rate": 2.73972602739726e-09, "log_odds_chosen": 2.7610912322998047, "log_odds_ratio": -0.3725714683532715, "logits/chosen": 0.9222643971443176, "logits/rejected": 0.9024466276168823, "logps/chosen": -2.3602914810180664, "logps/rejected": -4.983382701873779, "loss": 0.6731, "nll_loss": 0.6358456015586853, "rewards/accuracies": 0.75, "rewards/chosen": -0.23602914810180664, "rewards/margins": 0.26230910420417786, "rewards/rejected": -0.4983382225036621, "step": 7280 }, { "epoch": 19.93429158110883, "grad_norm": 3.974269390106201, "learning_rate": 2.602739726027397e-09, "log_odds_chosen": 3.20632266998291, "log_odds_ratio": -0.22169503569602966, "logits/chosen": 1.3129817247390747, "logits/rejected": 1.312517523765564, "logps/chosen": -1.7049002647399902, "logps/rejected": -4.730666637420654, "loss": 0.4962, "nll_loss": 0.4739985167980194, "rewards/accuracies": 0.875, "rewards/chosen": -0.17049002647399902, "rewards/margins": 0.30257663130760193, "rewards/rejected": -0.47306668758392334, "step": 7281 }, { "epoch": 19.93702943189596, "grad_norm": 4.093825817108154, "learning_rate": 2.465753424657534e-09, "log_odds_chosen": 2.5739405155181885, "log_odds_ratio": -0.25327733159065247, "logits/chosen": 1.0556614398956299, "logits/rejected": 1.0235130786895752, "logps/chosen": -1.6565204858779907, "logps/rejected": -4.041488170623779, "loss": 0.5363, "nll_loss": 0.5109654068946838, "rewards/accuracies": 0.875, "rewards/chosen": -0.16565203666687012, "rewards/margins": 0.23849675059318542, "rewards/rejected": -0.40414881706237793, "step": 7282 }, { "epoch": 19.939767282683093, "grad_norm": 3.6298556327819824, "learning_rate": 2.328767123287671e-09, "log_odds_chosen": 1.9159399271011353, "log_odds_ratio": -0.29218846559524536, "logits/chosen": 1.1317543983459473, "logits/rejected": 1.0634410381317139, "logps/chosen": -1.618482232093811, "logps/rejected": -3.316941261291504, "loss": 0.4221, "nll_loss": 0.3928353488445282, "rewards/accuracies": 0.875, "rewards/chosen": -0.16184821724891663, "rewards/margins": 0.16984590888023376, "rewards/rejected": -0.3316941261291504, "step": 7283 }, { "epoch": 19.942505133470227, "grad_norm": 3.6231753826141357, "learning_rate": 2.191780821917808e-09, "log_odds_chosen": 3.785698413848877, "log_odds_ratio": -0.12809771299362183, "logits/chosen": 0.9800418615341187, "logits/rejected": 0.9829879999160767, "logps/chosen": -2.120854377746582, "logps/rejected": -5.694503307342529, "loss": 0.6117, "nll_loss": 0.5989121794700623, "rewards/accuracies": 1.0, "rewards/chosen": -0.21208545565605164, "rewards/margins": 0.3573648929595947, "rewards/rejected": -0.5694503784179688, "step": 7284 }, { "epoch": 19.94524298425736, "grad_norm": 4.204490661621094, "learning_rate": 2.054794520547945e-09, "log_odds_chosen": 2.1343092918395996, "log_odds_ratio": -0.22240573167800903, "logits/chosen": 1.2709872722625732, "logits/rejected": 1.2714675664901733, "logps/chosen": -1.6657273769378662, "logps/rejected": -3.6296021938323975, "loss": 0.509, "nll_loss": 0.48672038316726685, "rewards/accuracies": 1.0, "rewards/chosen": -0.16657274961471558, "rewards/margins": 0.19638746976852417, "rewards/rejected": -0.36296021938323975, "step": 7285 }, { "epoch": 19.94798083504449, "grad_norm": 3.444520950317383, "learning_rate": 1.9178082191780823e-09, "log_odds_chosen": 3.6254611015319824, "log_odds_ratio": -0.09273174405097961, "logits/chosen": 1.4439573287963867, "logits/rejected": 1.3826161623001099, "logps/chosen": -1.252223253250122, "logps/rejected": -4.55116081237793, "loss": 0.3933, "nll_loss": 0.38402819633483887, "rewards/accuracies": 1.0, "rewards/chosen": -0.1252223253250122, "rewards/margins": 0.32989373803138733, "rewards/rejected": -0.4551160931587219, "step": 7286 }, { "epoch": 19.95071868583162, "grad_norm": 3.2690529823303223, "learning_rate": 1.780821917808219e-09, "log_odds_chosen": 3.122532367706299, "log_odds_ratio": -0.1342100352048874, "logits/chosen": 1.146388053894043, "logits/rejected": 1.179097294807434, "logps/chosen": -2.0370941162109375, "logps/rejected": -4.978950500488281, "loss": 0.5248, "nll_loss": 0.5113815665245056, "rewards/accuracies": 1.0, "rewards/chosen": -0.2037094086408615, "rewards/margins": 0.294185608625412, "rewards/rejected": -0.4978950023651123, "step": 7287 }, { "epoch": 19.953456536618756, "grad_norm": 10.329150199890137, "learning_rate": 1.6438356164383562e-09, "log_odds_chosen": 3.4488415718078613, "log_odds_ratio": -0.4370875358581543, "logits/chosen": 1.2924079895019531, "logits/rejected": 1.323082685470581, "logps/chosen": -2.3135664463043213, "logps/rejected": -5.594289779663086, "loss": 0.615, "nll_loss": 0.5712620615959167, "rewards/accuracies": 0.875, "rewards/chosen": -0.2313566505908966, "rewards/margins": 0.32807236909866333, "rewards/rejected": -0.5594289898872375, "step": 7288 }, { "epoch": 19.956194387405887, "grad_norm": 4.040230751037598, "learning_rate": 1.5068493150684932e-09, "log_odds_chosen": 3.263770341873169, "log_odds_ratio": -0.28380030393600464, "logits/chosen": 1.1225128173828125, "logits/rejected": 1.1403676271438599, "logps/chosen": -1.9362181425094604, "logps/rejected": -5.074052333831787, "loss": 0.568, "nll_loss": 0.539600670337677, "rewards/accuracies": 0.875, "rewards/chosen": -0.19362181425094604, "rewards/margins": 0.3137834370136261, "rewards/rejected": -0.5074052810668945, "step": 7289 }, { "epoch": 19.958932238193018, "grad_norm": 3.559087038040161, "learning_rate": 1.36986301369863e-09, "log_odds_chosen": 1.704710841178894, "log_odds_ratio": -0.2860815227031708, "logits/chosen": 1.0833972692489624, "logits/rejected": 1.0295758247375488, "logps/chosen": -1.249192714691162, "logps/rejected": -2.70798921585083, "loss": 0.4338, "nll_loss": 0.40518277883529663, "rewards/accuracies": 0.875, "rewards/chosen": -0.12491928040981293, "rewards/margins": 0.14587964117527008, "rewards/rejected": -0.270798921585083, "step": 7290 }, { "epoch": 19.96167008898015, "grad_norm": 8.407442092895508, "learning_rate": 1.232876712328767e-09, "log_odds_chosen": 1.0838665962219238, "log_odds_ratio": -0.6842073202133179, "logits/chosen": 0.905350387096405, "logits/rejected": 1.007320523262024, "logps/chosen": -3.399104356765747, "logps/rejected": -4.451962947845459, "loss": 0.8129, "nll_loss": 0.7444929480552673, "rewards/accuracies": 0.75, "rewards/chosen": -0.33991044759750366, "rewards/margins": 0.10528583824634552, "rewards/rejected": -0.4451963007450104, "step": 7291 }, { "epoch": 19.964407939767284, "grad_norm": 12.386310577392578, "learning_rate": 1.095890410958904e-09, "log_odds_chosen": 1.313535451889038, "log_odds_ratio": -0.4867998957633972, "logits/chosen": 1.3033976554870605, "logits/rejected": 1.2666510343551636, "logps/chosen": -1.751632809638977, "logps/rejected": -2.929952383041382, "loss": 0.562, "nll_loss": 0.5133636593818665, "rewards/accuracies": 0.625, "rewards/chosen": -0.17516328394412994, "rewards/margins": 0.11783197522163391, "rewards/rejected": -0.29299524426460266, "step": 7292 }, { "epoch": 19.967145790554415, "grad_norm": 3.463604211807251, "learning_rate": 9.589041095890411e-10, "log_odds_chosen": 2.5950586795806885, "log_odds_ratio": -0.15506866574287415, "logits/chosen": 1.3470447063446045, "logits/rejected": 1.2932178974151611, "logps/chosen": -1.3659238815307617, "logps/rejected": -3.616050958633423, "loss": 0.4798, "nll_loss": 0.46432727575302124, "rewards/accuracies": 1.0, "rewards/chosen": -0.13659238815307617, "rewards/margins": 0.22501270473003387, "rewards/rejected": -0.36160510778427124, "step": 7293 }, { "epoch": 19.969883641341546, "grad_norm": 3.337127208709717, "learning_rate": 8.219178082191781e-10, "log_odds_chosen": 3.0777034759521484, "log_odds_ratio": -0.18575924634933472, "logits/chosen": 0.9936162233352661, "logits/rejected": 0.9033308029174805, "logps/chosen": -2.216280937194824, "logps/rejected": -4.988611221313477, "loss": 0.4931, "nll_loss": 0.4745495319366455, "rewards/accuracies": 0.875, "rewards/chosen": -0.2216280847787857, "rewards/margins": 0.2772330641746521, "rewards/rejected": -0.4988611936569214, "step": 7294 }, { "epoch": 19.972621492128678, "grad_norm": 4.649560451507568, "learning_rate": 6.84931506849315e-10, "log_odds_chosen": 2.8250083923339844, "log_odds_ratio": -0.18176570534706116, "logits/chosen": 0.7709149122238159, "logits/rejected": 0.7055429220199585, "logps/chosen": -1.559364676475525, "logps/rejected": -4.160139083862305, "loss": 0.5, "nll_loss": 0.48180118203163147, "rewards/accuracies": 1.0, "rewards/chosen": -0.15593647956848145, "rewards/margins": 0.26007747650146484, "rewards/rejected": -0.4160139560699463, "step": 7295 }, { "epoch": 19.975359342915812, "grad_norm": 3.3092806339263916, "learning_rate": 5.47945205479452e-10, "log_odds_chosen": 3.925952911376953, "log_odds_ratio": -0.16791924834251404, "logits/chosen": 0.9893336892127991, "logits/rejected": 0.9576940536499023, "logps/chosen": -1.2806707620620728, "logps/rejected": -4.903594970703125, "loss": 0.4603, "nll_loss": 0.4434967041015625, "rewards/accuracies": 1.0, "rewards/chosen": -0.12806707620620728, "rewards/margins": 0.36229246854782104, "rewards/rejected": -0.4903595447540283, "step": 7296 }, { "epoch": 19.978097193702943, "grad_norm": 3.7351765632629395, "learning_rate": 4.1095890410958906e-10, "log_odds_chosen": 3.976651191711426, "log_odds_ratio": -0.07711019366979599, "logits/chosen": 1.1224560737609863, "logits/rejected": 1.155797004699707, "logps/chosen": -1.773249864578247, "logps/rejected": -5.537476539611816, "loss": 0.4933, "nll_loss": 0.48560693860054016, "rewards/accuracies": 1.0, "rewards/chosen": -0.17732499539852142, "rewards/margins": 0.37642261385917664, "rewards/rejected": -0.5537476539611816, "step": 7297 }, { "epoch": 19.980835044490075, "grad_norm": 3.6104657649993896, "learning_rate": 2.73972602739726e-10, "log_odds_chosen": 3.628371000289917, "log_odds_ratio": -0.11832473427057266, "logits/chosen": 1.3188738822937012, "logits/rejected": 1.305725336074829, "logps/chosen": -1.7241644859313965, "logps/rejected": -5.071164608001709, "loss": 0.4749, "nll_loss": 0.46302640438079834, "rewards/accuracies": 1.0, "rewards/chosen": -0.17241646349430084, "rewards/margins": 0.3347000181674957, "rewards/rejected": -0.507116436958313, "step": 7298 }, { "epoch": 19.983572895277206, "grad_norm": 4.924307346343994, "learning_rate": 1.36986301369863e-10, "log_odds_chosen": 1.2919399738311768, "log_odds_ratio": -0.36868926882743835, "logits/chosen": 1.1187900304794312, "logits/rejected": 1.1471688747406006, "logps/chosen": -2.221674919128418, "logps/rejected": -3.3998074531555176, "loss": 0.5501, "nll_loss": 0.5132110714912415, "rewards/accuracies": 0.875, "rewards/chosen": -0.2221674919128418, "rewards/margins": 0.11781325936317444, "rewards/rejected": -0.33998075127601624, "step": 7299 }, { "epoch": 19.98631074606434, "grad_norm": 9.296124458312988, "learning_rate": 0.0, "log_odds_chosen": 1.0673062801361084, "log_odds_ratio": -0.554894745349884, "logits/chosen": 1.2469114065170288, "logits/rejected": 1.1566678285598755, "logps/chosen": -2.693725109100342, "logps/rejected": -3.6574413776397705, "loss": 0.6539, "nll_loss": 0.5983918309211731, "rewards/accuracies": 0.625, "rewards/chosen": -0.26937252283096313, "rewards/margins": 0.09637163579463959, "rewards/rejected": -0.3657441735267639, "step": 7300 } ], "logging_steps": 1, "max_steps": 7300, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }