{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.5, "eval_steps": 100, "global_step": 3821, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.3054830287206268e-08, "logits/chosen": -2.369490146636963, "logits/rejected": -1.5597286224365234, "logps/chosen": -385.4990234375, "logps/rejected": -273.42059326171875, "loss": 0.0246, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.0, "learning_rate": 1.3054830287206266e-07, "logits/chosen": -2.0491132736206055, "logits/rejected": -1.8426718711853027, "logps/chosen": -310.26873779296875, "logps/rejected": -267.3825378417969, "loss": 0.0441, "rewards/accuracies": 0.3888888955116272, "rewards/chosen": -0.0011040156241506338, "rewards/margins": -0.0008565255557186902, "rewards/rejected": -0.00024749012663960457, "step": 10 }, { "epoch": 0.0, "learning_rate": 2.610966057441253e-07, "logits/chosen": -1.9567826986312866, "logits/rejected": -1.711861252784729, "logps/chosen": -219.58609008789062, "logps/rejected": -234.59213256835938, "loss": 0.0574, "rewards/accuracies": 0.5, "rewards/chosen": -0.0003204078529961407, "rewards/margins": 0.001007176237180829, "rewards/rejected": -0.0013275842647999525, "step": 20 }, { "epoch": 0.0, "learning_rate": 3.9164490861618804e-07, "logits/chosen": -2.031904697418213, "logits/rejected": -1.9914891719818115, "logps/chosen": -249.6280059814453, "logps/rejected": -252.6583709716797, "loss": 0.0438, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 5.8166508097201586e-05, "rewards/margins": 0.005917771253734827, "rewards/rejected": -0.005859604105353355, "step": 30 }, { "epoch": 0.01, "learning_rate": 5.221932114882506e-07, "logits/chosen": -1.8940136432647705, "logits/rejected": -1.8449805974960327, "logps/chosen": -227.1477508544922, "logps/rejected": -220.53396606445312, "loss": 0.0456, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.009471016004681587, "rewards/margins": 0.021536212414503098, "rewards/rejected": -0.031007226556539536, "step": 40 }, { "epoch": 0.01, "learning_rate": 6.527415143603135e-07, "logits/chosen": -1.9943370819091797, "logits/rejected": -1.8909276723861694, "logps/chosen": -315.15350341796875, "logps/rejected": -302.82415771484375, "loss": 0.0401, "rewards/accuracies": 0.625, "rewards/chosen": -0.037343986332416534, "rewards/margins": 0.052781712263822556, "rewards/rejected": -0.09012570232152939, "step": 50 }, { "epoch": 0.01, "learning_rate": 7.832898172323761e-07, "logits/chosen": -2.053264856338501, "logits/rejected": -1.9205459356307983, "logps/chosen": -343.319580078125, "logps/rejected": -314.1142272949219, "loss": 0.0505, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.1338554471731186, "rewards/margins": 0.07730092853307724, "rewards/rejected": -0.21115641295909882, "step": 60 }, { "epoch": 0.01, "learning_rate": 9.138381201044387e-07, "logits/chosen": -1.8854602575302124, "logits/rejected": -1.791210412979126, "logps/chosen": -293.802734375, "logps/rejected": -311.8372497558594, "loss": 0.0205, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.07498244941234589, "rewards/margins": 0.09804259240627289, "rewards/rejected": -0.17302504181861877, "step": 70 }, { "epoch": 0.01, "learning_rate": 1.0443864229765013e-06, "logits/chosen": -2.045764446258545, "logits/rejected": -1.849530577659607, "logps/chosen": -283.08514404296875, "logps/rejected": -276.24542236328125, "loss": 0.0166, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.024600975215435028, "rewards/margins": 0.08562992513179779, "rewards/rejected": -0.11023088544607162, "step": 80 }, { "epoch": 0.01, "learning_rate": 1.1749347258485642e-06, "logits/chosen": -1.9340765476226807, "logits/rejected": -1.7772783041000366, "logps/chosen": -248.1184539794922, "logps/rejected": -240.0049285888672, "loss": 0.0341, "rewards/accuracies": 0.625, "rewards/chosen": -0.06872863322496414, "rewards/margins": 0.1059706062078476, "rewards/rejected": -0.17469926178455353, "step": 90 }, { "epoch": 0.01, "learning_rate": 1.305483028720627e-06, "logits/chosen": -1.8918840885162354, "logits/rejected": -1.7343670129776, "logps/chosen": -247.6029815673828, "logps/rejected": -272.4024658203125, "loss": 0.0266, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.08719220012426376, "rewards/margins": 0.08935483545064926, "rewards/rejected": -0.17654703557491302, "step": 100 }, { "epoch": 0.01, "eval_logits/chosen": -1.9105064868927002, "eval_logits/rejected": -1.7278672456741333, "eval_logps/chosen": -271.5140075683594, "eval_logps/rejected": -255.43084716796875, "eval_loss": 0.032837070524692535, "eval_rewards/accuracies": 0.6240000128746033, "eval_rewards/chosen": -0.08067915588617325, "eval_rewards/margins": 0.09029880166053772, "eval_rewards/rejected": -0.17097796499729156, "eval_runtime": 1438.7473, "eval_samples_per_second": 1.39, "eval_steps_per_second": 0.348, "step": 100 }, { "epoch": 0.01, "learning_rate": 1.4360313315926894e-06, "logits/chosen": -1.835116982460022, "logits/rejected": -1.9719572067260742, "logps/chosen": -270.7997741699219, "logps/rejected": -303.7984313964844, "loss": 0.0406, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.006692466791719198, "rewards/margins": 0.0049112169072031975, "rewards/rejected": -0.011603685095906258, "step": 110 }, { "epoch": 0.02, "learning_rate": 1.5665796344647521e-06, "logits/chosen": -1.7181291580200195, "logits/rejected": -1.7216064929962158, "logps/chosen": -257.0571594238281, "logps/rejected": -271.36822509765625, "loss": 0.0453, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.009867778047919273, "rewards/margins": 0.009023504331707954, "rewards/rejected": -0.018891282379627228, "step": 120 }, { "epoch": 0.02, "learning_rate": 1.6971279373368146e-06, "logits/chosen": -1.8182661533355713, "logits/rejected": -1.7234899997711182, "logps/chosen": -272.1319274902344, "logps/rejected": -258.28466796875, "loss": 0.0516, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.008071488700807095, "rewards/margins": 0.019269878044724464, "rewards/rejected": -0.027341369539499283, "step": 130 }, { "epoch": 0.02, "learning_rate": 1.8276762402088774e-06, "logits/chosen": -1.816423773765564, "logits/rejected": -1.683519959449768, "logps/chosen": -230.3521270751953, "logps/rejected": -238.66659545898438, "loss": 0.0511, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.0176777932792902, "rewards/margins": 0.017024535685777664, "rewards/rejected": -0.034702327102422714, "step": 140 }, { "epoch": 0.02, "learning_rate": 1.9582245430809403e-06, "logits/chosen": -1.678515076637268, "logits/rejected": -1.6133581399917603, "logps/chosen": -253.1982421875, "logps/rejected": -319.99322509765625, "loss": 0.0428, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.024333838373422623, "rewards/margins": 0.02610177733004093, "rewards/rejected": -0.050435613840818405, "step": 150 }, { "epoch": 0.02, "learning_rate": 2.0887728459530026e-06, "logits/chosen": -1.6322215795516968, "logits/rejected": -1.658489465713501, "logps/chosen": -269.10064697265625, "logps/rejected": -355.62982177734375, "loss": 0.0362, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.03421890363097191, "rewards/margins": 0.04979991167783737, "rewards/rejected": -0.08401881158351898, "step": 160 }, { "epoch": 0.02, "learning_rate": 2.2193211488250653e-06, "logits/chosen": -1.9706852436065674, "logits/rejected": -1.6260664463043213, "logps/chosen": -344.87713623046875, "logps/rejected": -353.2381286621094, "loss": 0.0304, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.061358414590358734, "rewards/margins": 0.04989536851644516, "rewards/rejected": -0.1112537831068039, "step": 170 }, { "epoch": 0.02, "learning_rate": 2.3498694516971284e-06, "logits/chosen": -1.8163375854492188, "logits/rejected": -1.5700896978378296, "logps/chosen": -258.64105224609375, "logps/rejected": -338.1070251464844, "loss": 0.0258, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.0774625912308693, "rewards/margins": 0.06613731384277344, "rewards/rejected": -0.14359989762306213, "step": 180 }, { "epoch": 0.02, "learning_rate": 2.4804177545691907e-06, "logits/chosen": -1.852280855178833, "logits/rejected": -1.7033179998397827, "logps/chosen": -338.18060302734375, "logps/rejected": -374.50762939453125, "loss": 0.0171, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.08768017590045929, "rewards/margins": 0.0716872438788414, "rewards/rejected": -0.1593673974275589, "step": 190 }, { "epoch": 0.03, "learning_rate": 2.610966057441254e-06, "logits/chosen": -1.820905089378357, "logits/rejected": -1.5377622842788696, "logps/chosen": -370.4166564941406, "logps/rejected": -401.53326416015625, "loss": 0.0314, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.12992268800735474, "rewards/margins": 0.06325654685497284, "rewards/rejected": -0.19317921996116638, "step": 200 }, { "epoch": 0.03, "learning_rate": 2.741514360313316e-06, "logits/chosen": -1.7099710702896118, "logits/rejected": -1.646240234375, "logps/chosen": -389.4395751953125, "logps/rejected": -527.8355712890625, "loss": 0.0193, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.13263192772865295, "rewards/margins": 0.09985637664794922, "rewards/rejected": -0.23248830437660217, "step": 210 }, { "epoch": 0.03, "learning_rate": 2.872062663185379e-06, "logits/chosen": -1.8093169927597046, "logits/rejected": -1.6953338384628296, "logps/chosen": -325.22711181640625, "logps/rejected": -392.5846252441406, "loss": 0.0128, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.1114763468503952, "rewards/margins": 0.05936206504702568, "rewards/rejected": -0.17083843052387238, "step": 220 }, { "epoch": 0.03, "learning_rate": 3.0026109660574416e-06, "logits/chosen": -1.7330865859985352, "logits/rejected": -1.6318591833114624, "logps/chosen": -341.5679931640625, "logps/rejected": -428.912841796875, "loss": 0.0216, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1032460555434227, "rewards/margins": 0.08458513021469116, "rewards/rejected": -0.18783119320869446, "step": 230 }, { "epoch": 0.03, "learning_rate": 3.1331592689295043e-06, "logits/chosen": -1.7688487768173218, "logits/rejected": -1.5405038595199585, "logps/chosen": -364.29620361328125, "logps/rejected": -505.416015625, "loss": 0.0133, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.10832507908344269, "rewards/margins": 0.09639595448970795, "rewards/rejected": -0.20472104847431183, "step": 240 }, { "epoch": 0.03, "learning_rate": 3.263707571801567e-06, "logits/chosen": -1.5668131113052368, "logits/rejected": -1.519717812538147, "logps/chosen": -332.30218505859375, "logps/rejected": -402.8693542480469, "loss": 0.038, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.11549460887908936, "rewards/margins": 0.06707409769296646, "rewards/rejected": -0.1825687289237976, "step": 250 }, { "epoch": 0.03, "learning_rate": 3.3942558746736293e-06, "logits/chosen": -1.6691093444824219, "logits/rejected": -1.416657567024231, "logps/chosen": -449.72540283203125, "logps/rejected": -473.7371520996094, "loss": 0.0163, "rewards/accuracies": 0.75, "rewards/chosen": -0.13770505785942078, "rewards/margins": 0.07610594481229782, "rewards/rejected": -0.213810995221138, "step": 260 }, { "epoch": 0.04, "learning_rate": 3.524804177545692e-06, "logits/chosen": -1.7901989221572876, "logits/rejected": -1.7466062307357788, "logps/chosen": -437.5882263183594, "logps/rejected": -469.40545654296875, "loss": 0.0156, "rewards/accuracies": 0.625, "rewards/chosen": -0.1391008198261261, "rewards/margins": 0.038480814546346664, "rewards/rejected": -0.17758163809776306, "step": 270 }, { "epoch": 0.04, "learning_rate": 3.6553524804177547e-06, "logits/chosen": -1.6745504140853882, "logits/rejected": -1.5152240991592407, "logps/chosen": -366.30206298828125, "logps/rejected": -355.90997314453125, "loss": 0.0212, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.08930500596761703, "rewards/margins": 0.051776617765426636, "rewards/rejected": -0.14108161628246307, "step": 280 }, { "epoch": 0.04, "learning_rate": 3.7859007832898174e-06, "logits/chosen": -1.851283311843872, "logits/rejected": -1.7219947576522827, "logps/chosen": -373.37127685546875, "logps/rejected": -498.83575439453125, "loss": 0.0236, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.12356267124414444, "rewards/margins": 0.1061166301369667, "rewards/rejected": -0.22967930138111115, "step": 290 }, { "epoch": 0.04, "learning_rate": 3.9164490861618806e-06, "logits/chosen": -1.7400296926498413, "logits/rejected": -1.8181949853897095, "logps/chosen": -405.6025085449219, "logps/rejected": -493.41015625, "loss": 0.0288, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.10420694202184677, "rewards/margins": 0.06644182652235031, "rewards/rejected": -0.17064878344535828, "step": 300 }, { "epoch": 0.04, "learning_rate": 4.046997389033943e-06, "logits/chosen": -1.6865379810333252, "logits/rejected": -1.433075189590454, "logps/chosen": -389.51068115234375, "logps/rejected": -396.79986572265625, "loss": 0.0227, "rewards/accuracies": 0.625, "rewards/chosen": -0.11703090369701385, "rewards/margins": 0.05711246654391289, "rewards/rejected": -0.17414335906505585, "step": 310 }, { "epoch": 0.04, "learning_rate": 4.177545691906005e-06, "logits/chosen": -1.6912682056427002, "logits/rejected": -1.5915567874908447, "logps/chosen": -336.6483459472656, "logps/rejected": -468.4459533691406, "loss": 0.0205, "rewards/accuracies": 0.625, "rewards/chosen": -0.1152346134185791, "rewards/margins": 0.11246738582849503, "rewards/rejected": -0.22770199179649353, "step": 320 }, { "epoch": 0.04, "learning_rate": 4.308093994778068e-06, "logits/chosen": -1.8653459548950195, "logits/rejected": -1.7888025045394897, "logps/chosen": -427.0625915527344, "logps/rejected": -462.09478759765625, "loss": 0.0184, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.11530382931232452, "rewards/margins": 0.08452065289020538, "rewards/rejected": -0.1998244822025299, "step": 330 }, { "epoch": 0.04, "learning_rate": 4.4386422976501306e-06, "logits/chosen": -1.8150964975357056, "logits/rejected": -1.4187155961990356, "logps/chosen": -446.86700439453125, "logps/rejected": -458.47222900390625, "loss": 0.0177, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.13595229387283325, "rewards/margins": 0.10047326236963272, "rewards/rejected": -0.23642556369304657, "step": 340 }, { "epoch": 0.05, "learning_rate": 4.569190600522193e-06, "logits/chosen": -1.722266435623169, "logits/rejected": -1.6590478420257568, "logps/chosen": -303.31488037109375, "logps/rejected": -452.7051696777344, "loss": 0.0207, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.09452395886182785, "rewards/margins": 0.12657153606414795, "rewards/rejected": -0.2210954874753952, "step": 350 }, { "epoch": 0.05, "learning_rate": 4.699738903394257e-06, "logits/chosen": -1.7611217498779297, "logits/rejected": -1.5591890811920166, "logps/chosen": -429.9307556152344, "logps/rejected": -500.1407165527344, "loss": 0.0265, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.14049680531024933, "rewards/margins": 0.09930995851755142, "rewards/rejected": -0.23980677127838135, "step": 360 }, { "epoch": 0.05, "learning_rate": 4.8302872062663196e-06, "logits/chosen": -1.7344259023666382, "logits/rejected": -1.9218409061431885, "logps/chosen": -207.9822235107422, "logps/rejected": -447.21099853515625, "loss": 0.0259, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.055183105170726776, "rewards/margins": 0.06659817695617676, "rewards/rejected": -0.12178128957748413, "step": 370 }, { "epoch": 0.05, "learning_rate": 4.9608355091383814e-06, "logits/chosen": -1.818023920059204, "logits/rejected": -1.805349349975586, "logps/chosen": -288.32958984375, "logps/rejected": -360.1125183105469, "loss": 0.0273, "rewards/accuracies": 0.625, "rewards/chosen": -0.08490701764822006, "rewards/margins": 0.0758265033364296, "rewards/rejected": -0.16073353588581085, "step": 380 }, { "epoch": 0.05, "learning_rate": 4.9999488562447675e-06, "logits/chosen": -1.7136071920394897, "logits/rejected": -1.4141287803649902, "logps/chosen": -387.30340576171875, "logps/rejected": -480.63006591796875, "loss": 0.0271, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.10020840167999268, "rewards/margins": 0.11442838609218597, "rewards/rejected": -0.21463680267333984, "step": 390 }, { "epoch": 0.05, "learning_rate": 4.999698361256577e-06, "logits/chosen": -1.7331829071044922, "logits/rejected": -1.81618332862854, "logps/chosen": -392.0972900390625, "logps/rejected": -509.33026123046875, "loss": 0.0122, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.1033233031630516, "rewards/margins": 0.10533346980810165, "rewards/rejected": -0.20865675806999207, "step": 400 }, { "epoch": 0.05, "learning_rate": 4.999239142174581e-06, "logits/chosen": -1.8738248348236084, "logits/rejected": -1.6443102359771729, "logps/chosen": -390.17816162109375, "logps/rejected": -471.81719970703125, "loss": 0.0141, "rewards/accuracies": 0.625, "rewards/chosen": -0.14162249863147736, "rewards/margins": 0.07198909670114517, "rewards/rejected": -0.21361157298088074, "step": 410 }, { "epoch": 0.05, "learning_rate": 4.99857123734344e-06, "logits/chosen": -1.806711196899414, "logits/rejected": -1.6605952978134155, "logps/chosen": -439.01068115234375, "logps/rejected": -494.49298095703125, "loss": 0.0269, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.10551184415817261, "rewards/margins": 0.08282340317964554, "rewards/rejected": -0.18833525478839874, "step": 420 }, { "epoch": 0.06, "learning_rate": 4.997694702533016e-06, "logits/chosen": -1.787919044494629, "logits/rejected": -1.6730549335479736, "logps/chosen": -387.7268981933594, "logps/rejected": -472.0628356933594, "loss": 0.0207, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.10901729017496109, "rewards/margins": 0.08961702138185501, "rewards/rejected": -0.1986342966556549, "step": 430 }, { "epoch": 0.06, "learning_rate": 4.996609610933713e-06, "logits/chosen": -2.018451690673828, "logits/rejected": -1.7933435440063477, "logps/chosen": -429.22125244140625, "logps/rejected": -481.28521728515625, "loss": 0.0153, "rewards/accuracies": 0.75, "rewards/chosen": -0.10702776908874512, "rewards/margins": 0.08567257970571518, "rewards/rejected": -0.1927003413438797, "step": 440 }, { "epoch": 0.06, "learning_rate": 4.995316053150366e-06, "logits/chosen": -1.779102087020874, "logits/rejected": -1.6923627853393555, "logps/chosen": -341.5452575683594, "logps/rejected": -465.40814208984375, "loss": 0.0233, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.12305702269077301, "rewards/margins": 0.10371887683868408, "rewards/rejected": -0.2267758846282959, "step": 450 }, { "epoch": 0.06, "learning_rate": 4.9938141371946815e-06, "logits/chosen": -1.8598573207855225, "logits/rejected": -1.348837971687317, "logps/chosen": -441.0829162597656, "logps/rejected": -464.0218811035156, "loss": 0.0202, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.12903425097465515, "rewards/margins": 0.10815366357564926, "rewards/rejected": -0.23718790709972382, "step": 460 }, { "epoch": 0.06, "learning_rate": 4.992103988476206e-06, "logits/chosen": -1.7300249338150024, "logits/rejected": -1.596326231956482, "logps/chosen": -342.7567443847656, "logps/rejected": -406.29541015625, "loss": 0.0148, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.09964610636234283, "rewards/margins": 0.07735751569271088, "rewards/rejected": -0.1770036369562149, "step": 470 }, { "epoch": 0.06, "learning_rate": 4.990185749791866e-06, "logits/chosen": -1.7447799444198608, "logits/rejected": -1.6994373798370361, "logps/chosen": -379.8456115722656, "logps/rejected": -505.572021484375, "loss": 0.0226, "rewards/accuracies": 0.75, "rewards/chosen": -0.08133331686258316, "rewards/margins": 0.09739028662443161, "rewards/rejected": -0.17872360348701477, "step": 480 }, { "epoch": 0.06, "learning_rate": 4.9880595813140395e-06, "logits/chosen": -1.9732681512832642, "logits/rejected": -1.6870553493499756, "logps/chosen": -305.2247619628906, "logps/rejected": -357.5276184082031, "loss": 0.0195, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.05521916598081589, "rewards/margins": 0.08237910270690918, "rewards/rejected": -0.13759826123714447, "step": 490 }, { "epoch": 0.07, "learning_rate": 4.985725660577184e-06, "logits/chosen": -1.8921085596084595, "logits/rejected": -1.8584343194961548, "logps/chosen": -283.40106201171875, "logps/rejected": -382.2165832519531, "loss": 0.0164, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.07300186902284622, "rewards/margins": 0.0925629585981369, "rewards/rejected": -0.16556482017040253, "step": 500 }, { "epoch": 0.07, "learning_rate": 4.983184182463009e-06, "logits/chosen": -1.9322255849838257, "logits/rejected": -1.6384913921356201, "logps/chosen": -490.31304931640625, "logps/rejected": -518.8265380859375, "loss": 0.0117, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.12417304515838623, "rewards/margins": 0.11127636581659317, "rewards/rejected": -0.2354494333267212, "step": 510 }, { "epoch": 0.07, "learning_rate": 4.980435359184203e-06, "logits/chosen": -2.052896499633789, "logits/rejected": -1.837281584739685, "logps/chosen": -376.72503662109375, "logps/rejected": -417.20025634765625, "loss": 0.0139, "rewards/accuracies": 0.75, "rewards/chosen": -0.0975869745016098, "rewards/margins": 0.07774551957845688, "rewards/rejected": -0.17533248662948608, "step": 520 }, { "epoch": 0.07, "learning_rate": 4.9774794202667236e-06, "logits/chosen": -1.831719160079956, "logits/rejected": -1.6760985851287842, "logps/chosen": -392.82623291015625, "logps/rejected": -441.59906005859375, "loss": 0.0321, "rewards/accuracies": 0.625, "rewards/chosen": -0.09190316498279572, "rewards/margins": 0.07686121761798859, "rewards/rejected": -0.1687643975019455, "step": 530 }, { "epoch": 0.07, "learning_rate": 4.974316612530615e-06, "logits/chosen": -1.855755090713501, "logits/rejected": -1.8211097717285156, "logps/chosen": -391.76312255859375, "logps/rejected": -442.8075256347656, "loss": 0.0122, "rewards/accuracies": 0.75, "rewards/chosen": -0.11138787120580673, "rewards/margins": 0.051960915327072144, "rewards/rejected": -0.16334879398345947, "step": 540 }, { "epoch": 0.07, "learning_rate": 4.970947200069416e-06, "logits/chosen": -1.8003257513046265, "logits/rejected": -1.4926087856292725, "logps/chosen": -400.8650817871094, "logps/rejected": -480.7628479003906, "loss": 0.0196, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.12577614188194275, "rewards/margins": 0.13710124790668488, "rewards/rejected": -0.2628774046897888, "step": 550 }, { "epoch": 0.07, "learning_rate": 4.967371464228096e-06, "logits/chosen": -1.72431218624115, "logits/rejected": -1.7159755229949951, "logps/chosen": -391.9646911621094, "logps/rejected": -483.2106018066406, "loss": 0.0188, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.13392019271850586, "rewards/margins": 0.09055627882480621, "rewards/rejected": -0.22447650134563446, "step": 560 }, { "epoch": 0.07, "learning_rate": 4.963589703579569e-06, "logits/chosen": -1.722399353981018, "logits/rejected": -1.5180628299713135, "logps/chosen": -369.40313720703125, "logps/rejected": -437.03173828125, "loss": 0.0234, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.10009882599115372, "rewards/margins": 0.13228562474250793, "rewards/rejected": -0.23238444328308105, "step": 570 }, { "epoch": 0.08, "learning_rate": 4.9596022338997615e-06, "logits/chosen": -1.7229030132293701, "logits/rejected": -1.5098745822906494, "logps/chosen": -393.1999206542969, "logps/rejected": -474.98089599609375, "loss": 0.0187, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.12135627120733261, "rewards/margins": 0.09893262386322021, "rewards/rejected": -0.22028891742229462, "step": 580 }, { "epoch": 0.08, "learning_rate": 4.955409388141243e-06, "logits/chosen": -1.82937753200531, "logits/rejected": -1.7662051916122437, "logps/chosen": -357.93829345703125, "logps/rejected": -416.08880615234375, "loss": 0.0111, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.09621754288673401, "rewards/margins": 0.05945245549082756, "rewards/rejected": -0.15566998720169067, "step": 590 }, { "epoch": 0.08, "learning_rate": 4.951011516405429e-06, "logits/chosen": -1.9033095836639404, "logits/rejected": -1.7660681009292603, "logps/chosen": -346.52130126953125, "logps/rejected": -452.60284423828125, "loss": 0.013, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.08941993117332458, "rewards/margins": 0.1030849814414978, "rewards/rejected": -0.19250495731830597, "step": 600 }, { "epoch": 0.08, "learning_rate": 4.946408985913344e-06, "logits/chosen": -1.816651701927185, "logits/rejected": -1.7433621883392334, "logps/chosen": -372.8633728027344, "logps/rejected": -392.082275390625, "loss": 0.026, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.06204792857170105, "rewards/margins": 0.05188323184847832, "rewards/rejected": -0.11393114179372787, "step": 610 }, { "epoch": 0.08, "learning_rate": 4.941602180974958e-06, "logits/chosen": -1.8630107641220093, "logits/rejected": -1.7865245342254639, "logps/chosen": -276.0709533691406, "logps/rejected": -340.4119567871094, "loss": 0.0134, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.041813433170318604, "rewards/margins": 0.07650792598724365, "rewards/rejected": -0.11832137405872345, "step": 620 }, { "epoch": 0.08, "learning_rate": 4.936591502957101e-06, "logits/chosen": -1.874721884727478, "logits/rejected": -1.6759027242660522, "logps/chosen": -276.59930419921875, "logps/rejected": -372.3610534667969, "loss": 0.0338, "rewards/accuracies": 0.625, "rewards/chosen": -0.06007235124707222, "rewards/margins": 0.10215283930301666, "rewards/rejected": -0.1622251719236374, "step": 630 }, { "epoch": 0.08, "learning_rate": 4.931377370249946e-06, "logits/chosen": -1.767024040222168, "logits/rejected": -1.5827220678329468, "logps/chosen": -356.6845397949219, "logps/rejected": -489.23895263671875, "loss": 0.0139, "rewards/accuracies": 0.75, "rewards/chosen": -0.06190893054008484, "rewards/margins": 0.10998165607452393, "rewards/rejected": -0.17189057171344757, "step": 640 }, { "epoch": 0.09, "learning_rate": 4.925960218232073e-06, "logits/chosen": -1.8132911920547485, "logits/rejected": -1.6330859661102295, "logps/chosen": -395.65020751953125, "logps/rejected": -450.646728515625, "loss": 0.0161, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.0872199609875679, "rewards/margins": 0.08641021698713303, "rewards/rejected": -0.17363014817237854, "step": 650 }, { "epoch": 0.09, "learning_rate": 4.920340499234116e-06, "logits/chosen": -1.875640630722046, "logits/rejected": -1.783848524093628, "logps/chosen": -242.0182342529297, "logps/rejected": -332.5984191894531, "loss": 0.0222, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.03890664502978325, "rewards/margins": 0.09229302406311035, "rewards/rejected": -0.1311996728181839, "step": 660 }, { "epoch": 0.09, "learning_rate": 4.914518682500995e-06, "logits/chosen": -1.8508079051971436, "logits/rejected": -1.6670329570770264, "logps/chosen": -284.84747314453125, "logps/rejected": -354.06439208984375, "loss": 0.0171, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.041034869849681854, "rewards/margins": 0.08012250065803528, "rewards/rejected": -0.12115736305713654, "step": 670 }, { "epoch": 0.09, "learning_rate": 4.9084952541527315e-06, "logits/chosen": -1.6880519390106201, "logits/rejected": -1.5713207721710205, "logps/chosen": -281.82855224609375, "logps/rejected": -365.37884521484375, "loss": 0.0325, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.05120869725942612, "rewards/margins": 0.10565266758203506, "rewards/rejected": -0.1568613499403, "step": 680 }, { "epoch": 0.09, "learning_rate": 4.902270717143858e-06, "logits/chosen": -1.9048486948013306, "logits/rejected": -1.6325836181640625, "logps/chosen": -278.3301696777344, "logps/rejected": -372.2605285644531, "loss": 0.033, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.08664938062429428, "rewards/margins": 0.1053876131772995, "rewards/rejected": -0.19203698635101318, "step": 690 }, { "epoch": 0.09, "learning_rate": 4.895845591221427e-06, "logits/chosen": -1.6538032293319702, "logits/rejected": -1.490907073020935, "logps/chosen": -450.93408203125, "logps/rejected": -556.0208740234375, "loss": 0.0276, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.15130817890167236, "rewards/margins": 0.11338500678539276, "rewards/rejected": -0.26469317078590393, "step": 700 }, { "epoch": 0.09, "learning_rate": 4.8892204128816e-06, "logits/chosen": -1.7516053915023804, "logits/rejected": -1.3916836977005005, "logps/chosen": -466.16973876953125, "logps/rejected": -546.0506591796875, "loss": 0.0179, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.12585867941379547, "rewards/margins": 0.13165298104286194, "rewards/rejected": -0.2575116753578186, "step": 710 }, { "epoch": 0.09, "learning_rate": 4.882395735324864e-06, "logits/chosen": -1.7523753643035889, "logits/rejected": -1.752618432044983, "logps/chosen": -324.9375305175781, "logps/rejected": -472.83349609375, "loss": 0.0153, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.0883624404668808, "rewards/margins": 0.11331547796726227, "rewards/rejected": -0.20167791843414307, "step": 720 }, { "epoch": 0.1, "learning_rate": 4.87537212840983e-06, "logits/chosen": -1.9093246459960938, "logits/rejected": -1.6721271276474, "logps/chosen": -314.5921325683594, "logps/rejected": -324.62445068359375, "loss": 0.0206, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.05683114007115364, "rewards/margins": 0.04478532820940018, "rewards/rejected": -0.10161645710468292, "step": 730 }, { "epoch": 0.1, "learning_rate": 4.8681501786056545e-06, "logits/chosen": -1.9189097881317139, "logits/rejected": -1.6404218673706055, "logps/chosen": -292.4906921386719, "logps/rejected": -389.42205810546875, "loss": 0.0345, "rewards/accuracies": 0.75, "rewards/chosen": -0.05244673416018486, "rewards/margins": 0.10604684054851532, "rewards/rejected": -0.15849359333515167, "step": 740 }, { "epoch": 0.1, "learning_rate": 4.860730488943068e-06, "logits/chosen": -1.7673234939575195, "logits/rejected": -1.669486403465271, "logps/chosen": -331.96820068359375, "logps/rejected": -407.38885498046875, "loss": 0.0202, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.05730948597192764, "rewards/margins": 0.10414574295282364, "rewards/rejected": -0.1614551991224289, "step": 750 }, { "epoch": 0.1, "learning_rate": 4.853113678964022e-06, "logits/chosen": -1.861106514930725, "logits/rejected": -1.5694146156311035, "logps/chosen": -320.6669006347656, "logps/rejected": -389.9307556152344, "loss": 0.019, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.076021209359169, "rewards/margins": 0.110770083963871, "rewards/rejected": -0.1867913007736206, "step": 760 }, { "epoch": 0.1, "learning_rate": 4.845300384669958e-06, "logits/chosen": -1.7817033529281616, "logits/rejected": -1.6278111934661865, "logps/chosen": -286.9937744140625, "logps/rejected": -321.1000671386719, "loss": 0.0215, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.05109640210866928, "rewards/margins": 0.05691806599497795, "rewards/rejected": -0.10801446437835693, "step": 770 }, { "epoch": 0.1, "learning_rate": 4.837291258468701e-06, "logits/chosen": -1.9194482564926147, "logits/rejected": -1.5813281536102295, "logps/chosen": -368.80841064453125, "logps/rejected": -447.0177307128906, "loss": 0.0237, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.07502397149801254, "rewards/margins": 0.07454816997051239, "rewards/rejected": -0.14957214891910553, "step": 780 }, { "epoch": 0.1, "learning_rate": 4.829086969119984e-06, "logits/chosen": -1.73626708984375, "logits/rejected": -1.7255496978759766, "logps/chosen": -346.24310302734375, "logps/rejected": -499.1031799316406, "loss": 0.0199, "rewards/accuracies": 0.75, "rewards/chosen": -0.09376104176044464, "rewards/margins": 0.10573963075876236, "rewards/rejected": -0.1995006650686264, "step": 790 }, { "epoch": 0.1, "learning_rate": 4.820688201679605e-06, "logits/chosen": -1.758807897567749, "logits/rejected": -1.5062544345855713, "logps/chosen": -465.18487548828125, "logps/rejected": -498.99652099609375, "loss": 0.0252, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.13303157687187195, "rewards/margins": 0.09037965536117554, "rewards/rejected": -0.22341123223304749, "step": 800 }, { "epoch": 0.11, "learning_rate": 4.8120956574422315e-06, "logits/chosen": -1.8504056930541992, "logits/rejected": -1.747534990310669, "logps/chosen": -319.843505859375, "logps/rejected": -448.572998046875, "loss": 0.0269, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.12760835886001587, "rewards/margins": 0.12799301743507385, "rewards/rejected": -0.2556013762950897, "step": 810 }, { "epoch": 0.11, "learning_rate": 4.803310053882831e-06, "logits/chosen": -1.9025465250015259, "logits/rejected": -1.6481679677963257, "logps/chosen": -379.5039978027344, "logps/rejected": -426.4248046875, "loss": 0.0174, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.0971316546201706, "rewards/margins": 0.07221867889165878, "rewards/rejected": -0.16935034096240997, "step": 820 }, { "epoch": 0.11, "learning_rate": 4.794332124596775e-06, "logits/chosen": -1.8077484369277954, "logits/rejected": -1.4728492498397827, "logps/chosen": -365.80328369140625, "logps/rejected": -466.0181579589844, "loss": 0.0187, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.11555298417806625, "rewards/margins": 0.11450506746768951, "rewards/rejected": -0.23005802929401398, "step": 830 }, { "epoch": 0.11, "learning_rate": 4.785162619238575e-06, "logits/chosen": -1.8451635837554932, "logits/rejected": -1.6091960668563843, "logps/chosen": -397.75103759765625, "logps/rejected": -431.52362060546875, "loss": 0.0087, "rewards/accuracies": 0.625, "rewards/chosen": -0.1329265832901001, "rewards/margins": 0.08711318671703339, "rewards/rejected": -0.2200397551059723, "step": 840 }, { "epoch": 0.11, "learning_rate": 4.775802303459288e-06, "logits/chosen": -1.7575900554656982, "logits/rejected": -1.6303355693817139, "logps/chosen": -477.674560546875, "logps/rejected": -596.478515625, "loss": 0.0169, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.18462693691253662, "rewards/margins": 0.11819438636302948, "rewards/rejected": -0.3028213381767273, "step": 850 }, { "epoch": 0.11, "learning_rate": 4.766251958842589e-06, "logits/chosen": -1.8808314800262451, "logits/rejected": -1.745133638381958, "logps/chosen": -406.09649658203125, "logps/rejected": -445.7308044433594, "loss": 0.0169, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.1498914510011673, "rewards/margins": 0.056582141667604446, "rewards/rejected": -0.20647358894348145, "step": 860 }, { "epoch": 0.11, "learning_rate": 4.7565123828395066e-06, "logits/chosen": -1.7529296875, "logits/rejected": -1.5742405652999878, "logps/chosen": -406.1771240234375, "logps/rejected": -534.1820068359375, "loss": 0.0164, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.13195540010929108, "rewards/margins": 0.0883890837430954, "rewards/rejected": -0.22034449875354767, "step": 870 }, { "epoch": 0.12, "learning_rate": 4.746584388701831e-06, "logits/chosen": -2.031038761138916, "logits/rejected": -1.8793842792510986, "logps/chosen": -358.472412109375, "logps/rejected": -407.47406005859375, "loss": 0.022, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.07657279074192047, "rewards/margins": 0.05360488221049309, "rewards/rejected": -0.13017769157886505, "step": 880 }, { "epoch": 0.12, "learning_rate": 4.736468805414218e-06, "logits/chosen": -1.9064619541168213, "logits/rejected": -1.470027208328247, "logps/chosen": -382.59234619140625, "logps/rejected": -381.2503356933594, "loss": 0.0436, "rewards/accuracies": 0.625, "rewards/chosen": -0.0828360766172409, "rewards/margins": 0.07405532896518707, "rewards/rejected": -0.15689142048358917, "step": 890 }, { "epoch": 0.12, "learning_rate": 4.7261664776249595e-06, "logits/chosen": -1.8827707767486572, "logits/rejected": -1.6599212884902954, "logps/chosen": -344.37445068359375, "logps/rejected": -451.327880859375, "loss": 0.0133, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.07973988354206085, "rewards/margins": 0.09898734837770462, "rewards/rejected": -0.17872723937034607, "step": 900 }, { "epoch": 0.12, "learning_rate": 4.715678265575463e-06, "logits/chosen": -1.7086706161499023, "logits/rejected": -1.6703531742095947, "logps/chosen": -314.4049072265625, "logps/rejected": -450.04302978515625, "loss": 0.0147, "rewards/accuracies": 0.5, "rewards/chosen": -0.09818287193775177, "rewards/margins": 0.09443382918834686, "rewards/rejected": -0.19261668622493744, "step": 910 }, { "epoch": 0.12, "learning_rate": 4.705005045028415e-06, "logits/chosen": -1.662671685218811, "logits/rejected": -1.27628493309021, "logps/chosen": -454.2522888183594, "logps/rejected": -522.2706298828125, "loss": 0.0237, "rewards/accuracies": 0.625, "rewards/chosen": -0.13431644439697266, "rewards/margins": 0.0876445323228836, "rewards/rejected": -0.22196097671985626, "step": 920 }, { "epoch": 0.12, "learning_rate": 4.694147707194659e-06, "logits/chosen": -1.7272052764892578, "logits/rejected": -1.5347228050231934, "logps/chosen": -317.9644470214844, "logps/rejected": -497.8070373535156, "loss": 0.0252, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.12189044058322906, "rewards/margins": 0.15914377570152283, "rewards/rejected": -0.2810341715812683, "step": 930 }, { "epoch": 0.12, "learning_rate": 4.683107158658782e-06, "logits/chosen": -1.8561426401138306, "logits/rejected": -1.464103102684021, "logps/chosen": -412.8878479003906, "logps/rejected": -458.0304260253906, "loss": 0.022, "rewards/accuracies": 0.625, "rewards/chosen": -0.16590335965156555, "rewards/margins": 0.10721351951360703, "rewards/rejected": -0.2731168866157532, "step": 940 }, { "epoch": 0.12, "learning_rate": 4.671884321303407e-06, "logits/chosen": -1.9141470193862915, "logits/rejected": -1.7067787647247314, "logps/chosen": -339.55609130859375, "logps/rejected": -431.93719482421875, "loss": 0.02, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.12204339355230331, "rewards/margins": 0.11927355825901031, "rewards/rejected": -0.24131695926189423, "step": 950 }, { "epoch": 0.13, "learning_rate": 4.660480132232224e-06, "logits/chosen": -1.8624728918075562, "logits/rejected": -1.6955925226211548, "logps/chosen": -360.1496887207031, "logps/rejected": -473.90521240234375, "loss": 0.0153, "rewards/accuracies": 0.625, "rewards/chosen": -0.10254975408315659, "rewards/margins": 0.0967218279838562, "rewards/rejected": -0.1992715746164322, "step": 960 }, { "epoch": 0.13, "learning_rate": 4.6488955436917414e-06, "logits/chosen": -1.889905571937561, "logits/rejected": -1.6735109090805054, "logps/chosen": -357.2720031738281, "logps/rejected": -446.41552734375, "loss": 0.0102, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.09995651245117188, "rewards/margins": 0.09079816192388535, "rewards/rejected": -0.19075465202331543, "step": 970 }, { "epoch": 0.13, "learning_rate": 4.6371315229917644e-06, "logits/chosen": -1.7173998355865479, "logits/rejected": -1.5686781406402588, "logps/chosen": -415.00433349609375, "logps/rejected": -455.44580078125, "loss": 0.0253, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.1351235806941986, "rewards/margins": 0.07777445763349533, "rewards/rejected": -0.21289804577827454, "step": 980 }, { "epoch": 0.13, "learning_rate": 4.625189052424638e-06, "logits/chosen": -1.7635847330093384, "logits/rejected": -1.6083520650863647, "logps/chosen": -417.59405517578125, "logps/rejected": -563.2052001953125, "loss": 0.0347, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.13559845089912415, "rewards/margins": 0.14921309053897858, "rewards/rejected": -0.2848115563392639, "step": 990 }, { "epoch": 0.13, "learning_rate": 4.613069129183218e-06, "logits/chosen": -1.7733408212661743, "logits/rejected": -1.5813791751861572, "logps/chosen": -448.42071533203125, "logps/rejected": -528.0146484375, "loss": 0.0222, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.12599460780620575, "rewards/margins": 0.08608383685350418, "rewards/rejected": -0.21207845211029053, "step": 1000 }, { "epoch": 0.13, "learning_rate": 4.600772765277607e-06, "logits/chosen": -1.856818437576294, "logits/rejected": -1.665952444076538, "logps/chosen": -389.7002868652344, "logps/rejected": -465.99835205078125, "loss": 0.0168, "rewards/accuracies": 0.75, "rewards/chosen": -0.10839100927114487, "rewards/margins": 0.08755466341972351, "rewards/rejected": -0.19594568014144897, "step": 1010 }, { "epoch": 0.13, "learning_rate": 4.588300987450652e-06, "logits/chosen": -1.8819538354873657, "logits/rejected": -1.7245972156524658, "logps/chosen": -426.298095703125, "logps/rejected": -529.8442993164062, "loss": 0.016, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.1245245486497879, "rewards/margins": 0.10625261068344116, "rewards/rejected": -0.23077717423439026, "step": 1020 }, { "epoch": 0.13, "learning_rate": 4.5756548370922136e-06, "logits/chosen": -1.897899866104126, "logits/rejected": -1.9127979278564453, "logps/chosen": -354.75347900390625, "logps/rejected": -436.00433349609375, "loss": 0.0172, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.0811559334397316, "rewards/margins": 0.07884221524000168, "rewards/rejected": -0.15999814867973328, "step": 1030 }, { "epoch": 0.14, "learning_rate": 4.562835370152206e-06, "logits/chosen": -1.8487708568572998, "logits/rejected": -1.7927992343902588, "logps/chosen": -386.8661193847656, "logps/rejected": -424.00579833984375, "loss": 0.0255, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.09799676388502121, "rewards/margins": 0.03722173348069191, "rewards/rejected": -0.13521848618984222, "step": 1040 }, { "epoch": 0.14, "learning_rate": 4.54984365705243e-06, "logits/chosen": -1.8459587097167969, "logits/rejected": -1.7035545110702515, "logps/chosen": -312.6791687011719, "logps/rejected": -409.29156494140625, "loss": 0.0276, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.08885835111141205, "rewards/margins": 0.10372103750705719, "rewards/rejected": -0.19257937371730804, "step": 1050 }, { "epoch": 0.14, "learning_rate": 4.536680782597191e-06, "logits/chosen": -1.5797417163848877, "logits/rejected": -1.7094873189926147, "logps/chosen": -346.49139404296875, "logps/rejected": -525.2951049804688, "loss": 0.0195, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.09388957172632217, "rewards/margins": 0.13888582587242126, "rewards/rejected": -0.23277540504932404, "step": 1060 }, { "epoch": 0.14, "learning_rate": 4.523347845882718e-06, "logits/chosen": -1.772716760635376, "logits/rejected": -1.7684208154678345, "logps/chosen": -467.524658203125, "logps/rejected": -579.660888671875, "loss": 0.0238, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.1878214180469513, "rewards/margins": 0.07850656658411026, "rewards/rejected": -0.26632797718048096, "step": 1070 }, { "epoch": 0.14, "learning_rate": 4.50984596020539e-06, "logits/chosen": -1.9691396951675415, "logits/rejected": -1.7679029703140259, "logps/chosen": -455.80364990234375, "logps/rejected": -545.2230224609375, "loss": 0.0187, "rewards/accuracies": 0.75, "rewards/chosen": -0.1325797587633133, "rewards/margins": 0.11153139173984528, "rewards/rejected": -0.24411115050315857, "step": 1080 }, { "epoch": 0.14, "learning_rate": 4.4961762529687745e-06, "logits/chosen": -1.82855224609375, "logits/rejected": -1.646068811416626, "logps/chosen": -376.3056945800781, "logps/rejected": -422.94622802734375, "loss": 0.0141, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.12117953598499298, "rewards/margins": 0.0768873319029808, "rewards/rejected": -0.19806687533855438, "step": 1090 }, { "epoch": 0.14, "learning_rate": 4.482339865589492e-06, "logits/chosen": -2.0833468437194824, "logits/rejected": -1.731947660446167, "logps/chosen": -394.7748718261719, "logps/rejected": -430.6640625, "loss": 0.022, "rewards/accuracies": 0.625, "rewards/chosen": -0.1216883510351181, "rewards/margins": 0.0657147616147995, "rewards/rejected": -0.1874031126499176, "step": 1100 }, { "epoch": 0.15, "learning_rate": 4.468337953401909e-06, "logits/chosen": -1.8922927379608154, "logits/rejected": -1.6288894414901733, "logps/chosen": -419.62744140625, "logps/rejected": -483.31561279296875, "loss": 0.0168, "rewards/accuracies": 0.625, "rewards/chosen": -0.12291643768548965, "rewards/margins": 0.10072420537471771, "rewards/rejected": -0.22364065051078796, "step": 1110 }, { "epoch": 0.15, "learning_rate": 4.45417168556166e-06, "logits/chosen": -1.717938780784607, "logits/rejected": -1.6389567852020264, "logps/chosen": -448.9976501464844, "logps/rejected": -485.7889709472656, "loss": 0.0201, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.11510507762432098, "rewards/margins": 0.061794381588697433, "rewards/rejected": -0.17689943313598633, "step": 1120 }, { "epoch": 0.15, "learning_rate": 4.439842244948036e-06, "logits/chosen": -1.8700908422470093, "logits/rejected": -1.5595957040786743, "logps/chosen": -348.52313232421875, "logps/rejected": -400.2507019042969, "loss": 0.0175, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.10297670215368271, "rewards/margins": 0.08136579394340515, "rewards/rejected": -0.18434248864650726, "step": 1130 }, { "epoch": 0.15, "learning_rate": 4.425350828065204e-06, "logits/chosen": -1.9104478359222412, "logits/rejected": -1.7754993438720703, "logps/chosen": -400.5302734375, "logps/rejected": -461.37353515625, "loss": 0.0209, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.10379412025213242, "rewards/margins": 0.07275176048278809, "rewards/rejected": -0.1765458881855011, "step": 1140 }, { "epoch": 0.15, "learning_rate": 4.410698644942303e-06, "logits/chosen": -1.8551292419433594, "logits/rejected": -1.7061628103256226, "logps/chosen": -378.5152587890625, "logps/rejected": -515.3646850585938, "loss": 0.0197, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.12428358942270279, "rewards/margins": 0.10639438778162003, "rewards/rejected": -0.2306780070066452, "step": 1150 }, { "epoch": 0.15, "learning_rate": 4.395886919032406e-06, "logits/chosen": -1.6530601978302002, "logits/rejected": -1.592722773551941, "logps/chosen": -371.8653869628906, "logps/rejected": -539.634765625, "loss": 0.0145, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.11384312063455582, "rewards/margins": 0.14821864664554596, "rewards/rejected": -0.2620617747306824, "step": 1160 }, { "epoch": 0.15, "learning_rate": 4.380916887110366e-06, "logits/chosen": -1.9166767597198486, "logits/rejected": -1.728567361831665, "logps/chosen": -330.86578369140625, "logps/rejected": -428.4871520996094, "loss": 0.0144, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.10327453911304474, "rewards/margins": 0.0963578075170517, "rewards/rejected": -0.19963231682777405, "step": 1170 }, { "epoch": 0.15, "learning_rate": 4.365789799169539e-06, "logits/chosen": -1.8074811697006226, "logits/rejected": -1.7127392292022705, "logps/chosen": -347.7506103515625, "logps/rejected": -427.75750732421875, "loss": 0.0219, "rewards/accuracies": 0.5, "rewards/chosen": -0.10831247270107269, "rewards/margins": 0.09210260212421417, "rewards/rejected": -0.20041505992412567, "step": 1180 }, { "epoch": 0.16, "learning_rate": 4.350506918317416e-06, "logits/chosen": -1.8168857097625732, "logits/rejected": -1.6645673513412476, "logps/chosen": -386.88677978515625, "logps/rejected": -446.7989196777344, "loss": 0.0196, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.11320777237415314, "rewards/margins": 0.06845235824584961, "rewards/rejected": -0.18166013062000275, "step": 1190 }, { "epoch": 0.16, "learning_rate": 4.335069520670149e-06, "logits/chosen": -1.9594255685806274, "logits/rejected": -1.8522495031356812, "logps/chosen": -354.8717346191406, "logps/rejected": -420.1785583496094, "loss": 0.0288, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.11588549613952637, "rewards/margins": 0.05756071209907532, "rewards/rejected": -0.17344620823860168, "step": 1200 }, { "epoch": 0.16, "learning_rate": 4.319478895246e-06, "logits/chosen": -1.9707610607147217, "logits/rejected": -1.5603879690170288, "logps/chosen": -461.23779296875, "logps/rejected": -534.6307983398438, "loss": 0.0202, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.11856283992528915, "rewards/margins": 0.11594156175851822, "rewards/rejected": -0.23450438678264618, "step": 1210 }, { "epoch": 0.16, "learning_rate": 4.303736343857704e-06, "logits/chosen": -2.092125177383423, "logits/rejected": -1.7407474517822266, "logps/chosen": -465.5767517089844, "logps/rejected": -499.806396484375, "loss": 0.0237, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.1197667345404625, "rewards/margins": 0.06783206760883331, "rewards/rejected": -0.1875987946987152, "step": 1220 }, { "epoch": 0.16, "learning_rate": 4.287843181003772e-06, "logits/chosen": -1.7572336196899414, "logits/rejected": -1.7284873723983765, "logps/chosen": -410.6771545410156, "logps/rejected": -541.9444580078125, "loss": 0.0168, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.13083671033382416, "rewards/margins": 0.08576327562332153, "rewards/rejected": -0.2165999859571457, "step": 1230 }, { "epoch": 0.16, "learning_rate": 4.27180073375873e-06, "logits/chosen": -1.9774229526519775, "logits/rejected": -1.583739995956421, "logps/chosen": -443.5270080566406, "logps/rejected": -479.5216369628906, "loss": 0.0112, "rewards/accuracies": 0.75, "rewards/chosen": -0.13674288988113403, "rewards/margins": 0.10858182609081268, "rewards/rejected": -0.24532470107078552, "step": 1240 }, { "epoch": 0.16, "learning_rate": 4.255610341662304e-06, "logits/chosen": -1.7707653045654297, "logits/rejected": -1.8004286289215088, "logps/chosen": -317.6645202636719, "logps/rejected": -426.2616271972656, "loss": 0.0273, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.12054991722106934, "rewards/margins": 0.0729142427444458, "rewards/rejected": -0.19346415996551514, "step": 1250 }, { "epoch": 0.16, "learning_rate": 4.2392733566075764e-06, "logits/chosen": -1.9997937679290771, "logits/rejected": -1.7775239944458008, "logps/chosen": -362.96405029296875, "logps/rejected": -484.82037353515625, "loss": 0.0152, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.12314033508300781, "rewards/margins": 0.10408929735422134, "rewards/rejected": -0.22722962498664856, "step": 1260 }, { "epoch": 0.17, "learning_rate": 4.2227911427280975e-06, "logits/chosen": -1.9061148166656494, "logits/rejected": -1.4991739988327026, "logps/chosen": -378.03851318359375, "logps/rejected": -469.50811767578125, "loss": 0.0138, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.1264100968837738, "rewards/margins": 0.1268596053123474, "rewards/rejected": -0.2532697021961212, "step": 1270 }, { "epoch": 0.17, "learning_rate": 4.206165076283983e-06, "logits/chosen": -1.8601776361465454, "logits/rejected": -1.7021329402923584, "logps/chosen": -407.5594177246094, "logps/rejected": -465.5216369628906, "loss": 0.017, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.13072499632835388, "rewards/margins": 0.05420677736401558, "rewards/rejected": -0.18493175506591797, "step": 1280 }, { "epoch": 0.17, "learning_rate": 4.189396545546995e-06, "logits/chosen": -1.973583459854126, "logits/rejected": -1.6353248357772827, "logps/chosen": -381.9681701660156, "logps/rejected": -459.086669921875, "loss": 0.0241, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.13368123769760132, "rewards/margins": 0.11105000972747803, "rewards/rejected": -0.24473123252391815, "step": 1290 }, { "epoch": 0.17, "learning_rate": 4.172486950684627e-06, "logits/chosen": -1.8096119165420532, "logits/rejected": -1.9033712148666382, "logps/chosen": -397.31658935546875, "logps/rejected": -575.7694702148438, "loss": 0.0121, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.15405425429344177, "rewards/margins": 0.11879728734493256, "rewards/rejected": -0.2728515565395355, "step": 1300 }, { "epoch": 0.17, "learning_rate": 4.155437703643182e-06, "logits/chosen": -1.6964061260223389, "logits/rejected": -1.7630592584609985, "logps/chosen": -354.87554931640625, "logps/rejected": -480.3094177246094, "loss": 0.0392, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.10775692760944366, "rewards/margins": 0.11167238652706146, "rewards/rejected": -0.21942932903766632, "step": 1310 }, { "epoch": 0.17, "learning_rate": 4.138250228029882e-06, "logits/chosen": -1.8949930667877197, "logits/rejected": -1.7675005197525024, "logps/chosen": -404.69085693359375, "logps/rejected": -477.4461975097656, "loss": 0.0168, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.10534077882766724, "rewards/margins": 0.1056976318359375, "rewards/rejected": -0.21103842556476593, "step": 1320 }, { "epoch": 0.17, "learning_rate": 4.120925958993994e-06, "logits/chosen": -1.8363186120986938, "logits/rejected": -1.4940989017486572, "logps/chosen": -358.0071716308594, "logps/rejected": -492.396484375, "loss": 0.0417, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.11532169580459595, "rewards/margins": 0.14440791308879852, "rewards/rejected": -0.2597295939922333, "step": 1330 }, { "epoch": 0.18, "learning_rate": 4.103466343106999e-06, "logits/chosen": -1.7425540685653687, "logits/rejected": -1.6223710775375366, "logps/chosen": -386.5481872558594, "logps/rejected": -500.7713928222656, "loss": 0.0191, "rewards/accuracies": 0.75, "rewards/chosen": -0.13689905405044556, "rewards/margins": 0.11216177046298981, "rewards/rejected": -0.24906082451343536, "step": 1340 }, { "epoch": 0.18, "learning_rate": 4.085872838241797e-06, "logits/chosen": -1.898192048072815, "logits/rejected": -1.599747657775879, "logps/chosen": -428.51336669921875, "logps/rejected": -457.18951416015625, "loss": 0.0223, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.13870571553707123, "rewards/margins": 0.08469250798225403, "rewards/rejected": -0.22339823842048645, "step": 1350 }, { "epoch": 0.18, "learning_rate": 4.06814691345098e-06, "logits/chosen": -2.190451145172119, "logits/rejected": -1.8823210000991821, "logps/chosen": -451.5748596191406, "logps/rejected": -466.34649658203125, "loss": 0.0172, "rewards/accuracies": 0.625, "rewards/chosen": -0.13856498897075653, "rewards/margins": 0.0767131820321083, "rewards/rejected": -0.21527817845344543, "step": 1360 }, { "epoch": 0.18, "learning_rate": 4.050290048844171e-06, "logits/chosen": -1.8555920124053955, "logits/rejected": -1.664833426475525, "logps/chosen": -517.3372192382812, "logps/rejected": -568.2362060546875, "loss": 0.0148, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.18789918720722198, "rewards/margins": 0.06272252649068832, "rewards/rejected": -0.2506217062473297, "step": 1370 }, { "epoch": 0.18, "learning_rate": 4.032303735464422e-06, "logits/chosen": -1.5974675416946411, "logits/rejected": -1.5481300354003906, "logps/chosen": -415.5513610839844, "logps/rejected": -624.524658203125, "loss": 0.028, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.2292645424604416, "rewards/margins": 0.12871482968330383, "rewards/rejected": -0.3579793870449066, "step": 1380 }, { "epoch": 0.18, "learning_rate": 4.014189475163727e-06, "logits/chosen": -1.7195323705673218, "logits/rejected": -1.777994155883789, "logps/chosen": -394.39093017578125, "logps/rejected": -482.6665954589844, "loss": 0.0208, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.15285815298557281, "rewards/margins": 0.07930557429790497, "rewards/rejected": -0.2321637123823166, "step": 1390 }, { "epoch": 0.18, "learning_rate": 3.995948780477605e-06, "logits/chosen": -1.8426777124404907, "logits/rejected": -1.4613826274871826, "logps/chosen": -432.38787841796875, "logps/rejected": -466.80828857421875, "loss": 0.0175, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.13188788294792175, "rewards/margins": 0.08885625749826431, "rewards/rejected": -0.22074413299560547, "step": 1400 }, { "epoch": 0.18, "learning_rate": 3.977583174498816e-06, "logits/chosen": -1.8091497421264648, "logits/rejected": -1.7824805974960327, "logps/chosen": -322.8439025878906, "logps/rejected": -434.02557373046875, "loss": 0.0207, "rewards/accuracies": 0.625, "rewards/chosen": -0.1279405653476715, "rewards/margins": 0.09352116286754608, "rewards/rejected": -0.2214617282152176, "step": 1410 }, { "epoch": 0.19, "learning_rate": 3.959094190750172e-06, "logits/chosen": -1.9188625812530518, "logits/rejected": -1.8636420965194702, "logps/chosen": -307.5054626464844, "logps/rejected": -446.928466796875, "loss": 0.0145, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.09864123165607452, "rewards/margins": 0.11922135204076767, "rewards/rejected": -0.2178625762462616, "step": 1420 }, { "epoch": 0.19, "learning_rate": 3.9404833730564975e-06, "logits/chosen": -1.8266137838363647, "logits/rejected": -1.4754563570022583, "logps/chosen": -345.11956787109375, "logps/rejected": -480.0924377441406, "loss": 0.0129, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.11703918874263763, "rewards/margins": 0.14104144275188446, "rewards/rejected": -0.2580806314945221, "step": 1430 }, { "epoch": 0.19, "learning_rate": 3.921752275415712e-06, "logits/chosen": -1.7833318710327148, "logits/rejected": -1.6791677474975586, "logps/chosen": -395.7652282714844, "logps/rejected": -446.3226013183594, "loss": 0.0157, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.13844263553619385, "rewards/margins": 0.05796321481466293, "rewards/rejected": -0.19640584290027618, "step": 1440 }, { "epoch": 0.19, "learning_rate": 3.902902461869079e-06, "logits/chosen": -1.9195261001586914, "logits/rejected": -1.4448989629745483, "logps/chosen": -431.9091796875, "logps/rejected": -504.08099365234375, "loss": 0.0292, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.16437889635562897, "rewards/margins": 0.12609624862670898, "rewards/rejected": -0.29047515988349915, "step": 1450 }, { "epoch": 0.19, "learning_rate": 3.883935506370605e-06, "logits/chosen": -1.618941307067871, "logits/rejected": -1.6792643070220947, "logps/chosen": -504.3912048339844, "logps/rejected": -667.5081176757812, "loss": 0.0154, "rewards/accuracies": 0.625, "rewards/chosen": -0.21429996192455292, "rewards/margins": 0.0895891785621643, "rewards/rejected": -0.3038891553878784, "step": 1460 }, { "epoch": 0.19, "learning_rate": 3.864852992655617e-06, "logits/chosen": -1.925387978553772, "logits/rejected": -1.6994163990020752, "logps/chosen": -492.82781982421875, "logps/rejected": -554.85302734375, "loss": 0.0205, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.15964598953723907, "rewards/margins": 0.09300161153078079, "rewards/rejected": -0.2526475489139557, "step": 1470 }, { "epoch": 0.19, "learning_rate": 3.845656514108516e-06, "logits/chosen": -1.8864777088165283, "logits/rejected": -1.7926757335662842, "logps/chosen": -450.853759765625, "logps/rejected": -575.7086181640625, "loss": 0.0132, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.1719667613506317, "rewards/margins": 0.09205961972475052, "rewards/rejected": -0.26402637362480164, "step": 1480 }, { "epoch": 0.19, "learning_rate": 3.826347673629738e-06, "logits/chosen": -1.9465141296386719, "logits/rejected": -1.6575359106063843, "logps/chosen": -366.102783203125, "logps/rejected": -443.8168029785156, "loss": 0.0164, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.16453438997268677, "rewards/margins": 0.07573465257883072, "rewards/rejected": -0.2402690351009369, "step": 1490 }, { "epoch": 0.2, "learning_rate": 3.8069280835019062e-06, "logits/chosen": -1.933641791343689, "logits/rejected": -1.8136193752288818, "logps/chosen": -462.89263916015625, "logps/rejected": -517.4881591796875, "loss": 0.0131, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.16120457649230957, "rewards/margins": 0.07937152683734894, "rewards/rejected": -0.2405761033296585, "step": 1500 }, { "epoch": 0.2, "learning_rate": 3.7873993652552077e-06, "logits/chosen": -2.0754313468933105, "logits/rejected": -1.9829819202423096, "logps/chosen": -406.0433349609375, "logps/rejected": -498.377685546875, "loss": 0.0217, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.1559891402721405, "rewards/margins": 0.10568322241306305, "rewards/rejected": -0.26167237758636475, "step": 1510 }, { "epoch": 0.2, "learning_rate": 3.7677631495319953e-06, "logits/chosen": -1.8346569538116455, "logits/rejected": -1.4804002046585083, "logps/chosen": -430.29656982421875, "logps/rejected": -427.82659912109375, "loss": 0.0191, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.16190122067928314, "rewards/margins": 0.04960303753614426, "rewards/rejected": -0.2115042507648468, "step": 1520 }, { "epoch": 0.2, "learning_rate": 3.748021075950633e-06, "logits/chosen": -1.5542857646942139, "logits/rejected": -1.6141067743301392, "logps/chosen": -390.5488586425781, "logps/rejected": -528.735595703125, "loss": 0.0252, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.1550368368625641, "rewards/margins": 0.09110010415315628, "rewards/rejected": -0.24613693356513977, "step": 1530 }, { "epoch": 0.2, "learning_rate": 3.7281747929685824e-06, "logits/chosen": -1.755401611328125, "logits/rejected": -1.6087324619293213, "logps/chosen": -489.2933654785156, "logps/rejected": -585.9915771484375, "loss": 0.0188, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.1787690669298172, "rewards/margins": 0.10598522424697876, "rewards/rejected": -0.28475427627563477, "step": 1540 }, { "epoch": 0.2, "learning_rate": 3.7082259577447604e-06, "logits/chosen": -1.877626657485962, "logits/rejected": -1.9195016622543335, "logps/chosen": -454.4300231933594, "logps/rejected": -595.6709594726562, "loss": 0.0319, "rewards/accuracies": 0.75, "rewards/chosen": -0.16006502509117126, "rewards/margins": 0.11250058561563492, "rewards/rejected": -0.2725656032562256, "step": 1550 }, { "epoch": 0.2, "learning_rate": 3.6881762360011688e-06, "logits/chosen": -1.8989111185073853, "logits/rejected": -1.555864691734314, "logps/chosen": -420.7596130371094, "logps/rejected": -494.38348388671875, "loss": 0.0163, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.14275918900966644, "rewards/margins": 0.1062660813331604, "rewards/rejected": -0.24902530014514923, "step": 1560 }, { "epoch": 0.21, "learning_rate": 3.668027301883802e-06, "logits/chosen": -1.827575922012329, "logits/rejected": -1.6837794780731201, "logps/chosen": -395.3829650878906, "logps/rejected": -443.8783264160156, "loss": 0.0252, "rewards/accuracies": 0.625, "rewards/chosen": -0.1571020483970642, "rewards/margins": 0.062250684946775436, "rewards/rejected": -0.21935272216796875, "step": 1570 }, { "epoch": 0.21, "learning_rate": 3.64778083782286e-06, "logits/chosen": -1.9678294658660889, "logits/rejected": -1.7185138463974, "logps/chosen": -478.1893615722656, "logps/rejected": -538.3648681640625, "loss": 0.017, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.16626183688640594, "rewards/margins": 0.07816623896360397, "rewards/rejected": -0.24442806839942932, "step": 1580 }, { "epoch": 0.21, "learning_rate": 3.627438534392268e-06, "logits/chosen": -1.7632687091827393, "logits/rejected": -1.5381275415420532, "logps/chosen": -394.9458312988281, "logps/rejected": -492.0277404785156, "loss": 0.0221, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.1442459225654602, "rewards/margins": 0.12947799265384674, "rewards/rejected": -0.27372390031814575, "step": 1590 }, { "epoch": 0.21, "learning_rate": 3.607002090168506e-06, "logits/chosen": -1.803227424621582, "logits/rejected": -1.702261209487915, "logps/chosen": -374.3648681640625, "logps/rejected": -477.6402282714844, "loss": 0.0292, "rewards/accuracies": 0.625, "rewards/chosen": -0.15046299993991852, "rewards/margins": 0.08392789214849472, "rewards/rejected": -0.23439089953899384, "step": 1600 }, { "epoch": 0.21, "learning_rate": 3.586473211588787e-06, "logits/chosen": -1.6564468145370483, "logits/rejected": -1.7320528030395508, "logps/chosen": -360.9053649902344, "logps/rejected": -533.3809814453125, "loss": 0.0202, "rewards/accuracies": 0.625, "rewards/chosen": -0.14958436787128448, "rewards/margins": 0.12201867997646332, "rewards/rejected": -0.2716030478477478, "step": 1610 }, { "epoch": 0.21, "learning_rate": 3.5658536128085623e-06, "logits/chosen": -1.7816816568374634, "logits/rejected": -1.6505823135375977, "logps/chosen": -390.59210205078125, "logps/rejected": -493.36920166015625, "loss": 0.016, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1287616640329361, "rewards/margins": 0.10660900175571442, "rewards/rejected": -0.2353706657886505, "step": 1620 }, { "epoch": 0.21, "learning_rate": 3.545145015558399e-06, "logits/chosen": -1.9129045009613037, "logits/rejected": -1.6966536045074463, "logps/chosen": -387.6809387207031, "logps/rejected": -403.5453796386719, "loss": 0.0245, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.11970832198858261, "rewards/margins": 0.07347720116376877, "rewards/rejected": -0.19318550825119019, "step": 1630 }, { "epoch": 0.21, "learning_rate": 3.5243491490002056e-06, "logits/chosen": -1.7627900838851929, "logits/rejected": -1.5936365127563477, "logps/chosen": -400.5910949707031, "logps/rejected": -447.85986328125, "loss": 0.0204, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.13534307479858398, "rewards/margins": 0.06724969297647476, "rewards/rejected": -0.20259277522563934, "step": 1640 }, { "epoch": 0.22, "learning_rate": 3.503467749582857e-06, "logits/chosen": -1.931465744972229, "logits/rejected": -1.7324622869491577, "logps/chosen": -338.14544677734375, "logps/rejected": -398.4444274902344, "loss": 0.0281, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.11063027381896973, "rewards/margins": 0.07334277033805847, "rewards/rejected": -0.1839730441570282, "step": 1650 }, { "epoch": 0.22, "learning_rate": 3.4825025608971947e-06, "logits/chosen": -1.9261465072631836, "logits/rejected": -1.5876089334487915, "logps/chosen": -400.53057861328125, "logps/rejected": -494.91400146484375, "loss": 0.0254, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.09498036652803421, "rewards/margins": 0.11777050793170929, "rewards/rejected": -0.2127508670091629, "step": 1660 }, { "epoch": 0.22, "learning_rate": 3.4614553335304407e-06, "logits/chosen": -2.022671937942505, "logits/rejected": -1.721441626548767, "logps/chosen": -451.26434326171875, "logps/rejected": -471.126708984375, "loss": 0.016, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.09799091517925262, "rewards/margins": 0.06697054952383041, "rewards/rejected": -0.16496145725250244, "step": 1670 }, { "epoch": 0.22, "learning_rate": 3.4403278249200222e-06, "logits/chosen": -1.8817470073699951, "logits/rejected": -1.8598620891571045, "logps/chosen": -347.35015869140625, "logps/rejected": -427.130615234375, "loss": 0.0193, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.09004762023687363, "rewards/margins": 0.0678776204586029, "rewards/rejected": -0.15792521834373474, "step": 1680 }, { "epoch": 0.22, "learning_rate": 3.4191217992068293e-06, "logits/chosen": -1.877916693687439, "logits/rejected": -1.697608232498169, "logps/chosen": -334.14239501953125, "logps/rejected": -372.6851806640625, "loss": 0.0148, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.0950525626540184, "rewards/margins": 0.05130559951066971, "rewards/rejected": -0.1463581770658493, "step": 1690 }, { "epoch": 0.22, "learning_rate": 3.3978390270879056e-06, "logits/chosen": -2.017803192138672, "logits/rejected": -1.8038629293441772, "logps/chosen": -481.0552673339844, "logps/rejected": -584.086181640625, "loss": 0.0155, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.11839693784713745, "rewards/margins": 0.09097306430339813, "rewards/rejected": -0.2093699872493744, "step": 1700 }, { "epoch": 0.22, "learning_rate": 3.3764812856685995e-06, "logits/chosen": -1.9411423206329346, "logits/rejected": -1.8091685771942139, "logps/chosen": -382.48443603515625, "logps/rejected": -395.9397277832031, "loss": 0.0267, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.12482907623052597, "rewards/margins": 0.035141877830028534, "rewards/rejected": -0.1599709540605545, "step": 1710 }, { "epoch": 0.23, "learning_rate": 3.3550503583141726e-06, "logits/chosen": -1.8497459888458252, "logits/rejected": -1.7582515478134155, "logps/chosen": -358.4706115722656, "logps/rejected": -515.0752563476562, "loss": 0.0251, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1583252251148224, "rewards/margins": 0.12078104168176651, "rewards/rejected": -0.2791062593460083, "step": 1720 }, { "epoch": 0.23, "learning_rate": 3.3335480345008907e-06, "logits/chosen": -1.73007071018219, "logits/rejected": -1.5075857639312744, "logps/chosen": -448.3133850097656, "logps/rejected": -479.6458435058594, "loss": 0.0192, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.1550527662038803, "rewards/margins": 0.05522305890917778, "rewards/rejected": -0.21027584373950958, "step": 1730 }, { "epoch": 0.23, "learning_rate": 3.3119761096666055e-06, "logits/chosen": -1.702124834060669, "logits/rejected": -1.4853382110595703, "logps/chosen": -402.51995849609375, "logps/rejected": -460.457275390625, "loss": 0.0232, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.14261862635612488, "rewards/margins": 0.09650713950395584, "rewards/rejected": -0.23912575840950012, "step": 1740 }, { "epoch": 0.23, "learning_rate": 3.290336385060832e-06, "logits/chosen": -1.6065213680267334, "logits/rejected": -1.573798418045044, "logps/chosen": -466.1717834472656, "logps/rejected": -644.224609375, "loss": 0.0152, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2248157560825348, "rewards/margins": 0.14126001298427582, "rewards/rejected": -0.36607569456100464, "step": 1750 }, { "epoch": 0.23, "learning_rate": 3.268630667594348e-06, "logits/chosen": -1.7461265325546265, "logits/rejected": -1.5468791723251343, "logps/chosen": -402.60931396484375, "logps/rejected": -446.5025329589844, "loss": 0.017, "rewards/accuracies": 0.625, "rewards/chosen": -0.17790451645851135, "rewards/margins": 0.08323220163583755, "rewards/rejected": -0.2611367106437683, "step": 1760 }, { "epoch": 0.23, "learning_rate": 3.2468607696883147e-06, "logits/chosen": -1.9368324279785156, "logits/rejected": -1.7913051843643188, "logps/chosen": -495.4078063964844, "logps/rejected": -530.65380859375, "loss": 0.0173, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.1627500355243683, "rewards/margins": 0.0602637343108654, "rewards/rejected": -0.2230137586593628, "step": 1770 }, { "epoch": 0.23, "learning_rate": 3.225028509122944e-06, "logits/chosen": -1.7925491333007812, "logits/rejected": -1.5885066986083984, "logps/chosen": -358.14117431640625, "logps/rejected": -418.9410705566406, "loss": 0.0193, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.1533903181552887, "rewards/margins": 0.08258415013551712, "rewards/rejected": -0.23597446084022522, "step": 1780 }, { "epoch": 0.23, "learning_rate": 3.2031357088857083e-06, "logits/chosen": -1.9862604141235352, "logits/rejected": -1.8626024723052979, "logps/chosen": -370.5836181640625, "logps/rejected": -453.05279541015625, "loss": 0.0229, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.12010282278060913, "rewards/margins": 0.1101105585694313, "rewards/rejected": -0.23021335899829865, "step": 1790 }, { "epoch": 0.24, "learning_rate": 3.181184197019127e-06, "logits/chosen": -1.8817790746688843, "logits/rejected": -1.5228415727615356, "logps/chosen": -425.82012939453125, "logps/rejected": -517.078125, "loss": 0.0162, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.14074283838272095, "rewards/margins": 0.12722983956336975, "rewards/rejected": -0.2679726481437683, "step": 1800 }, { "epoch": 0.24, "learning_rate": 3.159175806468126e-06, "logits/chosen": -1.658769965171814, "logits/rejected": -1.7822647094726562, "logps/chosen": -308.4710998535156, "logps/rejected": -461.7257385253906, "loss": 0.019, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.12070232629776001, "rewards/margins": 0.11304762214422226, "rewards/rejected": -0.23374994099140167, "step": 1810 }, { "epoch": 0.24, "learning_rate": 3.1371123749269804e-06, "logits/chosen": -1.9757215976715088, "logits/rejected": -1.7527239322662354, "logps/chosen": -421.7149353027344, "logps/rejected": -433.16619873046875, "loss": 0.026, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.12986886501312256, "rewards/margins": 0.0475795678794384, "rewards/rejected": -0.17744843661785126, "step": 1820 }, { "epoch": 0.24, "learning_rate": 3.114995744685877e-06, "logits/chosen": -1.863130807876587, "logits/rejected": -1.6808397769927979, "logps/chosen": -440.72149658203125, "logps/rejected": -507.3966369628906, "loss": 0.0164, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.1266050785779953, "rewards/margins": 0.10441949218511581, "rewards/rejected": -0.2310245782136917, "step": 1830 }, { "epoch": 0.24, "learning_rate": 3.0928277624770743e-06, "logits/chosen": -1.9252994060516357, "logits/rejected": -1.855322241783142, "logps/chosen": -365.15240478515625, "logps/rejected": -486.9214782714844, "loss": 0.0224, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.1379556953907013, "rewards/margins": 0.07425601780414581, "rewards/rejected": -0.2122117280960083, "step": 1840 }, { "epoch": 0.24, "learning_rate": 3.070610279320708e-06, "logits/chosen": -1.8004119396209717, "logits/rejected": -1.7161788940429688, "logps/chosen": -408.49566650390625, "logps/rejected": -475.2015075683594, "loss": 0.0184, "rewards/accuracies": 0.75, "rewards/chosen": -0.12649108469486237, "rewards/margins": 0.11580338329076767, "rewards/rejected": -0.24229446053504944, "step": 1850 }, { "epoch": 0.24, "learning_rate": 3.0483451503702264e-06, "logits/chosen": -1.7138731479644775, "logits/rejected": -1.4477074146270752, "logps/chosen": -474.27764892578125, "logps/rejected": -598.2100830078125, "loss": 0.0231, "rewards/accuracies": 0.75, "rewards/chosen": -0.18683870136737823, "rewards/margins": 0.12090454250574112, "rewards/rejected": -0.30774325132369995, "step": 1860 }, { "epoch": 0.24, "learning_rate": 3.0260342347574916e-06, "logits/chosen": -1.735713005065918, "logits/rejected": -1.648543357849121, "logps/chosen": -369.6907043457031, "logps/rejected": -470.18096923828125, "loss": 0.0191, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.1600438505411148, "rewards/margins": 0.10410650819540024, "rewards/rejected": -0.26415038108825684, "step": 1870 }, { "epoch": 0.25, "learning_rate": 3.0036793954375358e-06, "logits/chosen": -1.7015645503997803, "logits/rejected": -1.2797722816467285, "logps/chosen": -427.6006774902344, "logps/rejected": -526.3659057617188, "loss": 0.0226, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1544523686170578, "rewards/margins": 0.11350264400243759, "rewards/rejected": -0.2679550051689148, "step": 1880 }, { "epoch": 0.25, "learning_rate": 2.981282499033009e-06, "logits/chosen": -1.7517515420913696, "logits/rejected": -1.6246592998504639, "logps/chosen": -397.01226806640625, "logps/rejected": -464.5995178222656, "loss": 0.027, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.15207110345363617, "rewards/margins": 0.07603247463703156, "rewards/rejected": -0.22810356318950653, "step": 1890 }, { "epoch": 0.25, "learning_rate": 2.9588454156783163e-06, "logits/chosen": -2.0521302223205566, "logits/rejected": -1.645867109298706, "logps/chosen": -515.1359252929688, "logps/rejected": -567.1170654296875, "loss": 0.017, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.15798985958099365, "rewards/margins": 0.11956565082073212, "rewards/rejected": -0.27755552530288696, "step": 1900 }, { "epoch": 0.25, "learning_rate": 2.9363700188634597e-06, "logits/chosen": -1.6246519088745117, "logits/rejected": -1.469555139541626, "logps/chosen": -351.7187194824219, "logps/rejected": -459.64813232421875, "loss": 0.0265, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.157011941075325, "rewards/margins": 0.10143647342920303, "rewards/rejected": -0.25844839215278625, "step": 1910 }, { "epoch": 0.25, "learning_rate": 2.9138581852776053e-06, "logits/chosen": -1.6571727991104126, "logits/rejected": -1.7033582925796509, "logps/chosen": -300.0462951660156, "logps/rejected": -414.4212341308594, "loss": 0.0108, "rewards/accuracies": 0.625, "rewards/chosen": -0.12796807289123535, "rewards/margins": 0.07252534478902817, "rewards/rejected": -0.20049342513084412, "step": 1920 }, { "epoch": 0.25, "learning_rate": 2.8913117946523805e-06, "logits/chosen": -1.5375624895095825, "logits/rejected": -1.6519877910614014, "logps/chosen": -419.7509765625, "logps/rejected": -656.6526489257812, "loss": 0.022, "rewards/accuracies": 0.625, "rewards/chosen": -0.16821017861366272, "rewards/margins": 0.11995784193277359, "rewards/rejected": -0.2881679832935333, "step": 1930 }, { "epoch": 0.25, "learning_rate": 2.8687327296049126e-06, "logits/chosen": -1.7078078985214233, "logits/rejected": -1.6371896266937256, "logps/chosen": -450.70928955078125, "logps/rejected": -559.0304565429688, "loss": 0.0135, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.18661805987358093, "rewards/margins": 0.06663697212934494, "rewards/rejected": -0.25325506925582886, "step": 1940 }, { "epoch": 0.26, "learning_rate": 2.8461228754806376e-06, "logits/chosen": -1.6565535068511963, "logits/rejected": -1.5362708568572998, "logps/chosen": -427.30804443359375, "logps/rejected": -519.9610595703125, "loss": 0.0301, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.1637393683195114, "rewards/margins": 0.10776355117559433, "rewards/rejected": -0.27150291204452515, "step": 1950 }, { "epoch": 0.26, "learning_rate": 2.823484120195865e-06, "logits/chosen": -1.9046138525009155, "logits/rejected": -1.7616183757781982, "logps/chosen": -405.6791076660156, "logps/rejected": -410.101806640625, "loss": 0.0136, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.13973860442638397, "rewards/margins": 0.04140274599194527, "rewards/rejected": -0.18114134669303894, "step": 1960 }, { "epoch": 0.26, "learning_rate": 2.8008183540801486e-06, "logits/chosen": -1.7911622524261475, "logits/rejected": -1.692138671875, "logps/chosen": -330.9847106933594, "logps/rejected": -449.517822265625, "loss": 0.0135, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.13471712172031403, "rewards/margins": 0.09083884954452515, "rewards/rejected": -0.22555597126483917, "step": 1970 }, { "epoch": 0.26, "learning_rate": 2.7781274697184353e-06, "logits/chosen": -1.9190704822540283, "logits/rejected": -1.5853004455566406, "logps/chosen": -437.56884765625, "logps/rejected": -489.0252380371094, "loss": 0.0206, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.14917708933353424, "rewards/margins": 0.0892854705452919, "rewards/rejected": -0.23846253752708435, "step": 1980 }, { "epoch": 0.26, "learning_rate": 2.7554133617930397e-06, "logits/chosen": -1.685828447341919, "logits/rejected": -1.6816571950912476, "logps/chosen": -421.8097229003906, "logps/rejected": -497.8553771972656, "loss": 0.0168, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.14757490158081055, "rewards/margins": 0.07652349770069122, "rewards/rejected": -0.22409839928150177, "step": 1990 }, { "epoch": 0.26, "learning_rate": 2.7326779269254363e-06, "logits/chosen": -1.6492477655410767, "logits/rejected": -1.5454237461090088, "logps/chosen": -401.2170715332031, "logps/rejected": -452.13287353515625, "loss": 0.023, "rewards/accuracies": 0.75, "rewards/chosen": -0.15250924229621887, "rewards/margins": 0.10589434206485748, "rewards/rejected": -0.25840359926223755, "step": 2000 }, { "epoch": 0.26, "learning_rate": 2.7099230635178954e-06, "logits/chosen": -1.6588656902313232, "logits/rejected": -1.6458364725112915, "logps/chosen": -360.05450439453125, "logps/rejected": -479.4027404785156, "loss": 0.0384, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.14435285329818726, "rewards/margins": 0.10231070220470428, "rewards/rejected": -0.24666354060173035, "step": 2010 }, { "epoch": 0.26, "learning_rate": 2.6871506715949608e-06, "logits/chosen": -1.7712013721466064, "logits/rejected": -1.700981855392456, "logps/chosen": -438.7752990722656, "logps/rejected": -561.3502197265625, "loss": 0.0288, "rewards/accuracies": 0.625, "rewards/chosen": -0.14679552614688873, "rewards/margins": 0.11427030712366104, "rewards/rejected": -0.261065810918808, "step": 2020 }, { "epoch": 0.27, "learning_rate": 2.6643626526448063e-06, "logits/chosen": -1.796282172203064, "logits/rejected": -1.8202321529388428, "logps/chosen": -391.6895446777344, "logps/rejected": -539.5911865234375, "loss": 0.0184, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.14048923552036285, "rewards/margins": 0.12442316859960556, "rewards/rejected": -0.2649123966693878, "step": 2030 }, { "epoch": 0.27, "learning_rate": 2.6415609094604562e-06, "logits/chosen": -1.8824316263198853, "logits/rejected": -1.8531659841537476, "logps/chosen": -367.3789367675781, "logps/rejected": -548.4909057617188, "loss": 0.0203, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.14026226103305817, "rewards/margins": 0.14373354613780975, "rewards/rejected": -0.2839958071708679, "step": 2040 }, { "epoch": 0.27, "learning_rate": 2.618747345980904e-06, "logits/chosen": -1.8885694742202759, "logits/rejected": -1.444990634918213, "logps/chosen": -383.72686767578125, "logps/rejected": -440.8082580566406, "loss": 0.0238, "rewards/accuracies": 0.625, "rewards/chosen": -0.16496935486793518, "rewards/margins": 0.10502351820468903, "rewards/rejected": -0.2699928879737854, "step": 2050 }, { "epoch": 0.27, "learning_rate": 2.595923867132136e-06, "logits/chosen": -1.7916829586029053, "logits/rejected": -1.559659481048584, "logps/chosen": -484.2021484375, "logps/rejected": -493.0411682128906, "loss": 0.0232, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.16716818511486053, "rewards/margins": 0.08854760229587555, "rewards/rejected": -0.2557157874107361, "step": 2060 }, { "epoch": 0.27, "learning_rate": 2.5730923786680672e-06, "logits/chosen": -2.000430107116699, "logits/rejected": -1.641470193862915, "logps/chosen": -462.4923400878906, "logps/rejected": -510.4591369628906, "loss": 0.017, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.12160061299800873, "rewards/margins": 0.08607065677642822, "rewards/rejected": -0.20767128467559814, "step": 2070 }, { "epoch": 0.27, "learning_rate": 2.5502547870114137e-06, "logits/chosen": -1.7373088598251343, "logits/rejected": -1.5884270668029785, "logps/chosen": -354.4035949707031, "logps/rejected": -510.2303771972656, "loss": 0.0154, "rewards/accuracies": 0.625, "rewards/chosen": -0.13180679082870483, "rewards/margins": 0.1380440592765808, "rewards/rejected": -0.26985087990760803, "step": 2080 }, { "epoch": 0.27, "learning_rate": 2.527412999094507e-06, "logits/chosen": -1.909131407737732, "logits/rejected": -1.703380823135376, "logps/chosen": -365.89312744140625, "logps/rejected": -436.53411865234375, "loss": 0.0191, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.10623818635940552, "rewards/margins": 0.10509941726922989, "rewards/rejected": -0.2113375961780548, "step": 2090 }, { "epoch": 0.27, "learning_rate": 2.504568922200064e-06, "logits/chosen": -1.524195909500122, "logits/rejected": -1.5863279104232788, "logps/chosen": -386.9281921386719, "logps/rejected": -466.0390625, "loss": 0.0231, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.13788601756095886, "rewards/margins": 0.07915763556957245, "rewards/rejected": -0.2170436829328537, "step": 2100 }, { "epoch": 0.28, "learning_rate": 2.4817244638019333e-06, "logits/chosen": -1.733839988708496, "logits/rejected": -1.5340702533721924, "logps/chosen": -339.8945617675781, "logps/rejected": -570.2664794921875, "loss": 0.0134, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.14478543400764465, "rewards/margins": 0.1958717703819275, "rewards/rejected": -0.34065723419189453, "step": 2110 }, { "epoch": 0.28, "learning_rate": 2.4588815314058155e-06, "logits/chosen": -1.9334630966186523, "logits/rejected": -1.6082617044448853, "logps/chosen": -441.1907653808594, "logps/rejected": -562.14794921875, "loss": 0.0129, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.17673881351947784, "rewards/margins": 0.10720287263393402, "rewards/rejected": -0.28394168615341187, "step": 2120 }, { "epoch": 0.28, "learning_rate": 2.4360420323899922e-06, "logits/chosen": -1.8241560459136963, "logits/rejected": -1.5457789897918701, "logps/chosen": -414.11212158203125, "logps/rejected": -439.07318115234375, "loss": 0.0148, "rewards/accuracies": 0.625, "rewards/chosen": -0.14023596048355103, "rewards/margins": 0.07062707841396332, "rewards/rejected": -0.21086303889751434, "step": 2130 }, { "epoch": 0.28, "learning_rate": 2.4132078738460585e-06, "logits/chosen": -1.7795307636260986, "logits/rejected": -1.693245530128479, "logps/chosen": -413.4581604003906, "logps/rejected": -489.2635803222656, "loss": 0.0245, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.16430504620075226, "rewards/margins": 0.08753269165754318, "rewards/rejected": -0.25183773040771484, "step": 2140 }, { "epoch": 0.28, "learning_rate": 2.3903809624196826e-06, "logits/chosen": -1.7313525676727295, "logits/rejected": -1.7246475219726562, "logps/chosen": -446.88446044921875, "logps/rejected": -514.286865234375, "loss": 0.0136, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1518130749464035, "rewards/margins": 0.07510828971862793, "rewards/rejected": -0.22692136466503143, "step": 2150 }, { "epoch": 0.28, "learning_rate": 2.3675632041513978e-06, "logits/chosen": -2.0084481239318848, "logits/rejected": -1.8913724422454834, "logps/chosen": -369.7825927734375, "logps/rejected": -447.08367919921875, "loss": 0.0124, "rewards/accuracies": 0.625, "rewards/chosen": -0.14405110478401184, "rewards/margins": 0.07157470285892487, "rewards/rejected": -0.2156258076429367, "step": 2160 }, { "epoch": 0.28, "learning_rate": 2.3447565043174533e-06, "logits/chosen": -1.8906570672988892, "logits/rejected": -1.639074683189392, "logps/chosen": -393.3006591796875, "logps/rejected": -477.375, "loss": 0.0136, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.13008955121040344, "rewards/margins": 0.09611732512712479, "rewards/rejected": -0.22620686888694763, "step": 2170 }, { "epoch": 0.29, "learning_rate": 2.321962767270724e-06, "logits/chosen": -1.9170795679092407, "logits/rejected": -1.834633231163025, "logps/chosen": -357.61151123046875, "logps/rejected": -522.9044799804688, "loss": 0.0192, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.14305247366428375, "rewards/margins": 0.1302427351474762, "rewards/rejected": -0.27329522371292114, "step": 2180 }, { "epoch": 0.29, "learning_rate": 2.299183896281692e-06, "logits/chosen": -1.5739772319793701, "logits/rejected": -1.4046179056167603, "logps/chosen": -402.8276062011719, "logps/rejected": -533.167236328125, "loss": 0.0446, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.1857595443725586, "rewards/margins": 0.09556765854358673, "rewards/rejected": -0.2813272178173065, "step": 2190 }, { "epoch": 0.29, "learning_rate": 2.2764217933795297e-06, "logits/chosen": -1.7968279123306274, "logits/rejected": -1.6996835470199585, "logps/chosen": -383.11468505859375, "logps/rejected": -503.56536865234375, "loss": 0.0125, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.12219767272472382, "rewards/margins": 0.11918073892593384, "rewards/rejected": -0.24137838184833527, "step": 2200 }, { "epoch": 0.29, "learning_rate": 2.2536783591932786e-06, "logits/chosen": -1.7252916097640991, "logits/rejected": -1.660698652267456, "logps/chosen": -412.865966796875, "logps/rejected": -560.65869140625, "loss": 0.0209, "rewards/accuracies": 0.625, "rewards/chosen": -0.12717315554618835, "rewards/margins": 0.0997866615653038, "rewards/rejected": -0.22695982456207275, "step": 2210 }, { "epoch": 0.29, "learning_rate": 2.230955492793149e-06, "logits/chosen": -1.8447004556655884, "logits/rejected": -1.7087070941925049, "logps/chosen": -398.0675964355469, "logps/rejected": -497.8534240722656, "loss": 0.0179, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.11124436557292938, "rewards/margins": 0.08409640192985535, "rewards/rejected": -0.19534078240394592, "step": 2220 }, { "epoch": 0.29, "learning_rate": 2.208255091531947e-06, "logits/chosen": -2.020773410797119, "logits/rejected": -1.7263826131820679, "logps/chosen": -380.26385498046875, "logps/rejected": -417.6468200683594, "loss": 0.0104, "rewards/accuracies": 0.75, "rewards/chosen": -0.09765110909938812, "rewards/margins": 0.06216695159673691, "rewards/rejected": -0.15981806814670563, "step": 2230 }, { "epoch": 0.29, "learning_rate": 2.1855790508866435e-06, "logits/chosen": -1.7512352466583252, "logits/rejected": -1.6903069019317627, "logps/chosen": -350.62408447265625, "logps/rejected": -508.8916931152344, "loss": 0.0224, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.1171707957983017, "rewards/margins": 0.10106335580348969, "rewards/rejected": -0.21823418140411377, "step": 2240 }, { "epoch": 0.29, "learning_rate": 2.162929264300107e-06, "logits/chosen": -2.004178524017334, "logits/rejected": -2.0492522716522217, "logps/chosen": -375.3388671875, "logps/rejected": -452.77337646484375, "loss": 0.0123, "rewards/accuracies": 0.625, "rewards/chosen": -0.08443252742290497, "rewards/margins": 0.08801615983247757, "rewards/rejected": -0.17244866490364075, "step": 2250 }, { "epoch": 0.3, "learning_rate": 2.1403076230230006e-06, "logits/chosen": -1.9816453456878662, "logits/rejected": -1.7716138362884521, "logps/chosen": -376.86065673828125, "logps/rejected": -444.36614990234375, "loss": 0.0211, "rewards/accuracies": 0.625, "rewards/chosen": -0.11069681495428085, "rewards/margins": 0.0960860550403595, "rewards/rejected": -0.20678286254405975, "step": 2260 }, { "epoch": 0.3, "learning_rate": 2.11771601595586e-06, "logits/chosen": -1.7995827198028564, "logits/rejected": -1.5612187385559082, "logps/chosen": -312.52899169921875, "logps/rejected": -399.647216796875, "loss": 0.0098, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.10375572741031647, "rewards/margins": 0.09660674631595612, "rewards/rejected": -0.20036247372627258, "step": 2270 }, { "epoch": 0.3, "learning_rate": 2.0951563294913737e-06, "logits/chosen": -1.7093846797943115, "logits/rejected": -1.6065731048583984, "logps/chosen": -376.09454345703125, "logps/rejected": -477.0457458496094, "loss": 0.0136, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.12729713320732117, "rewards/margins": 0.10373828560113907, "rewards/rejected": -0.23103542625904083, "step": 2280 }, { "epoch": 0.3, "learning_rate": 2.0726304473568693e-06, "logits/chosen": -1.915750503540039, "logits/rejected": -1.7321512699127197, "logps/chosen": -425.9007873535156, "logps/rejected": -513.4124145507812, "loss": 0.0182, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.12515000998973846, "rewards/margins": 0.1002768725156784, "rewards/rejected": -0.22542688250541687, "step": 2290 }, { "epoch": 0.3, "learning_rate": 2.050140250457023e-06, "logits/chosen": -1.7121286392211914, "logits/rejected": -1.5676960945129395, "logps/chosen": -419.02667236328125, "logps/rejected": -464.94921875, "loss": 0.0202, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.12538568675518036, "rewards/margins": 0.11476848274469376, "rewards/rejected": -0.2401541769504547, "step": 2300 }, { "epoch": 0.3, "learning_rate": 2.0276876167168042e-06, "logits/chosen": -1.599696159362793, "logits/rejected": -1.684889554977417, "logps/chosen": -276.92926025390625, "logps/rejected": -393.26995849609375, "loss": 0.0215, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.0761098638176918, "rewards/margins": 0.0935591608285904, "rewards/rejected": -0.1696690320968628, "step": 2310 }, { "epoch": 0.3, "learning_rate": 2.0052744209246682e-06, "logits/chosen": -1.8279746770858765, "logits/rejected": -1.5182549953460693, "logps/chosen": -363.34478759765625, "logps/rejected": -387.684326171875, "loss": 0.0179, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.0703197568655014, "rewards/margins": 0.07770083844661713, "rewards/rejected": -0.14802059531211853, "step": 2320 }, { "epoch": 0.3, "learning_rate": 1.9829025345760127e-06, "logits/chosen": -1.8807859420776367, "logits/rejected": -1.748761773109436, "logps/chosen": -441.8787536621094, "logps/rejected": -481.5179748535156, "loss": 0.025, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.1366555392742157, "rewards/margins": 0.07487355917692184, "rewards/rejected": -0.21152910590171814, "step": 2330 }, { "epoch": 0.31, "learning_rate": 1.9605738257169115e-06, "logits/chosen": -1.8558143377304077, "logits/rejected": -1.602513313293457, "logps/chosen": -357.6425476074219, "logps/rejected": -474.4366760253906, "loss": 0.0303, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.132442906498909, "rewards/margins": 0.1250801384449005, "rewards/rejected": -0.2575230598449707, "step": 2340 }, { "epoch": 0.31, "learning_rate": 1.9382901587881275e-06, "logits/chosen": -1.7687349319458008, "logits/rejected": -1.6479012966156006, "logps/chosen": -406.641845703125, "logps/rejected": -518.3619995117188, "loss": 0.0113, "rewards/accuracies": 0.625, "rewards/chosen": -0.14297707378864288, "rewards/margins": 0.08667118847370148, "rewards/rejected": -0.22964826226234436, "step": 2350 }, { "epoch": 0.31, "learning_rate": 1.916053394469437e-06, "logits/chosen": -1.8459150791168213, "logits/rejected": -1.5883492231369019, "logps/chosen": -381.8975830078125, "logps/rejected": -463.957763671875, "loss": 0.0156, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.11032159626483917, "rewards/margins": 0.12916973233222961, "rewards/rejected": -0.2394913136959076, "step": 2360 }, { "epoch": 0.31, "learning_rate": 1.8938653895242604e-06, "logits/chosen": -2.071506977081299, "logits/rejected": -1.7145764827728271, "logps/chosen": -374.2837219238281, "logps/rejected": -434.7330017089844, "loss": 0.0155, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.1002299040555954, "rewards/margins": 0.08921743184328079, "rewards/rejected": -0.1894473284482956, "step": 2370 }, { "epoch": 0.31, "learning_rate": 1.8717279966446267e-06, "logits/chosen": -1.7913320064544678, "logits/rejected": -1.734678864479065, "logps/chosen": -397.92230224609375, "logps/rejected": -491.8880920410156, "loss": 0.0224, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.10940172523260117, "rewards/margins": 0.11103262007236481, "rewards/rejected": -0.22043435275554657, "step": 2380 }, { "epoch": 0.31, "learning_rate": 1.8496430642964698e-06, "logits/chosen": -1.8505489826202393, "logits/rejected": -1.7398958206176758, "logps/chosen": -376.2174987792969, "logps/rejected": -455.051513671875, "loss": 0.0109, "rewards/accuracies": 0.75, "rewards/chosen": -0.10867688804864883, "rewards/margins": 0.10923395305871964, "rewards/rejected": -0.21791084110736847, "step": 2390 }, { "epoch": 0.31, "learning_rate": 1.827612436565286e-06, "logits/chosen": -1.9409513473510742, "logits/rejected": -1.5838344097137451, "logps/chosen": -453.20538330078125, "logps/rejected": -396.49310302734375, "loss": 0.0265, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.1574598252773285, "rewards/margins": 0.037888940423727036, "rewards/rejected": -0.19534876942634583, "step": 2400 }, { "epoch": 0.32, "learning_rate": 1.8056379530021492e-06, "logits/chosen": -1.7505874633789062, "logits/rejected": -1.6526845693588257, "logps/chosen": -435.98199462890625, "logps/rejected": -546.981689453125, "loss": 0.0257, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.1385519951581955, "rewards/margins": 0.14368018507957458, "rewards/rejected": -0.2822321355342865, "step": 2410 }, { "epoch": 0.32, "learning_rate": 1.7837214484701154e-06, "logits/chosen": -1.9044792652130127, "logits/rejected": -1.7781755924224854, "logps/chosen": -341.1239013671875, "logps/rejected": -405.9642333984375, "loss": 0.018, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.13462628424167633, "rewards/margins": 0.06428463757038116, "rewards/rejected": -0.1989109367132187, "step": 2420 }, { "epoch": 0.32, "learning_rate": 1.7618647529910043e-06, "logits/chosen": -1.7790517807006836, "logits/rejected": -1.6076533794403076, "logps/chosen": -332.4115295410156, "logps/rejected": -485.473876953125, "loss": 0.0122, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.13369640707969666, "rewards/margins": 0.10562696307897568, "rewards/rejected": -0.23932337760925293, "step": 2430 }, { "epoch": 0.32, "learning_rate": 1.7400696915925996e-06, "logits/chosen": -1.8593626022338867, "logits/rejected": -1.6796321868896484, "logps/chosen": -410.10247802734375, "logps/rejected": -581.8309936523438, "loss": 0.025, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.11563781648874283, "rewards/margins": 0.14415881037712097, "rewards/rejected": -0.2597966492176056, "step": 2440 }, { "epoch": 0.32, "learning_rate": 1.718338084156254e-06, "logits/chosen": -1.9622865915298462, "logits/rejected": -1.7303345203399658, "logps/chosen": -379.3187561035156, "logps/rejected": -522.2726440429688, "loss": 0.0199, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.1560114622116089, "rewards/margins": 0.1418474167585373, "rewards/rejected": -0.297858864068985, "step": 2450 }, { "epoch": 0.32, "learning_rate": 1.6966717452649372e-06, "logits/chosen": -1.8643362522125244, "logits/rejected": -1.5710781812667847, "logps/chosen": -372.6382141113281, "logps/rejected": -513.40966796875, "loss": 0.0132, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.09803296625614166, "rewards/margins": 0.15759465098381042, "rewards/rejected": -0.2556275725364685, "step": 2460 }, { "epoch": 0.32, "learning_rate": 1.6750724840517103e-06, "logits/chosen": -1.9030168056488037, "logits/rejected": -1.643489122390747, "logps/chosen": -411.04541015625, "logps/rejected": -524.4387817382812, "loss": 0.0133, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.11378389596939087, "rewards/margins": 0.12280420958995819, "rewards/rejected": -0.23658815026283264, "step": 2470 }, { "epoch": 0.32, "learning_rate": 1.6535421040486686e-06, "logits/chosen": -2.0007407665252686, "logits/rejected": -1.853777527809143, "logps/chosen": -376.6355895996094, "logps/rejected": -490.8793029785156, "loss": 0.0181, "rewards/accuracies": 0.75, "rewards/chosen": -0.1196683794260025, "rewards/margins": 0.12484125792980194, "rewards/rejected": -0.24450962245464325, "step": 2480 }, { "epoch": 0.33, "learning_rate": 1.6320824030363458e-06, "logits/chosen": -1.8327369689941406, "logits/rejected": -1.6966917514801025, "logps/chosen": -368.7300720214844, "logps/rejected": -440.08917236328125, "loss": 0.0122, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1055096983909607, "rewards/margins": 0.09476348012685776, "rewards/rejected": -0.20027318596839905, "step": 2490 }, { "epoch": 0.33, "learning_rate": 1.6106951728936028e-06, "logits/chosen": -1.9331190586090088, "logits/rejected": -1.6993480920791626, "logps/chosen": -412.096923828125, "logps/rejected": -511.1239318847656, "loss": 0.0124, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.14224201440811157, "rewards/margins": 0.11230548471212387, "rewards/rejected": -0.25454747676849365, "step": 2500 }, { "epoch": 0.33, "learning_rate": 1.5893821994479996e-06, "logits/chosen": -1.8661571741104126, "logits/rejected": -1.6067588329315186, "logps/chosen": -483.56951904296875, "logps/rejected": -595.2945556640625, "loss": 0.0194, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.13534314930438995, "rewards/margins": 0.10849038511514664, "rewards/rejected": -0.2438335120677948, "step": 2510 }, { "epoch": 0.33, "learning_rate": 1.5681452623266868e-06, "logits/chosen": -1.8298006057739258, "logits/rejected": -1.7637277841567993, "logps/chosen": -371.1351318359375, "logps/rejected": -444.2303161621094, "loss": 0.0269, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.13254711031913757, "rewards/margins": 0.05975104495882988, "rewards/rejected": -0.19229814410209656, "step": 2520 }, { "epoch": 0.33, "learning_rate": 1.5469861348078014e-06, "logits/chosen": -1.9488359689712524, "logits/rejected": -1.7855498790740967, "logps/chosen": -342.57171630859375, "logps/rejected": -413.1770935058594, "loss": 0.0254, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.1030026450753212, "rewards/margins": 0.07978326082229614, "rewards/rejected": -0.18278591334819794, "step": 2530 }, { "epoch": 0.33, "learning_rate": 1.5259065836724035e-06, "logits/chosen": -1.9040677547454834, "logits/rejected": -1.6895372867584229, "logps/chosen": -380.28863525390625, "logps/rejected": -483.16131591796875, "loss": 0.0188, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.11208023130893707, "rewards/margins": 0.10833732783794403, "rewards/rejected": -0.2204175740480423, "step": 2540 }, { "epoch": 0.33, "learning_rate": 1.5049083690569456e-06, "logits/chosen": -1.890223503112793, "logits/rejected": -1.798566222190857, "logps/chosen": -408.01788330078125, "logps/rejected": -459.48687744140625, "loss": 0.0158, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.12085233628749847, "rewards/margins": 0.06504372507333755, "rewards/rejected": -0.18589606881141663, "step": 2550 }, { "epoch": 0.33, "learning_rate": 1.4839932443063057e-06, "logits/chosen": -1.8459657430648804, "logits/rejected": -1.6654268503189087, "logps/chosen": -373.38531494140625, "logps/rejected": -460.004150390625, "loss": 0.0259, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.12574592232704163, "rewards/margins": 0.10881473869085312, "rewards/rejected": -0.23456065356731415, "step": 2560 }, { "epoch": 0.34, "learning_rate": 1.4631629558273803e-06, "logits/chosen": -1.8869895935058594, "logits/rejected": -1.6317555904388428, "logps/chosen": -323.6122131347656, "logps/rejected": -445.6595764160156, "loss": 0.018, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.10208660364151001, "rewards/margins": 0.12573343515396118, "rewards/rejected": -0.2278200387954712, "step": 2570 }, { "epoch": 0.34, "learning_rate": 1.4424192429432657e-06, "logits/chosen": -1.6856921911239624, "logits/rejected": -1.7072092294692993, "logps/chosen": -356.3609924316406, "logps/rejected": -509.64013671875, "loss": 0.0243, "rewards/accuracies": 0.625, "rewards/chosen": -0.14678624272346497, "rewards/margins": 0.09828634560108185, "rewards/rejected": -0.24507257342338562, "step": 2580 }, { "epoch": 0.34, "learning_rate": 1.421763837748016e-06, "logits/chosen": -1.9052375555038452, "logits/rejected": -1.522080659866333, "logps/chosen": -468.0479431152344, "logps/rejected": -586.0888061523438, "loss": 0.0152, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.15239429473876953, "rewards/margins": 0.13636548817157745, "rewards/rejected": -0.2887597680091858, "step": 2590 }, { "epoch": 0.34, "learning_rate": 1.401198464962021e-06, "logits/chosen": -1.8970810174942017, "logits/rejected": -1.7944128513336182, "logps/chosen": -366.1616516113281, "logps/rejected": -475.0137634277344, "loss": 0.0141, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.12462279945611954, "rewards/margins": 0.0947902649641037, "rewards/rejected": -0.21941304206848145, "step": 2600 }, { "epoch": 0.34, "learning_rate": 1.3807248417879896e-06, "logits/chosen": -1.9154447317123413, "logits/rejected": -1.8471952676773071, "logps/chosen": -371.0704650878906, "logps/rejected": -513.1985473632812, "loss": 0.0177, "rewards/accuracies": 0.625, "rewards/chosen": -0.11407079547643661, "rewards/margins": 0.13133589923381805, "rewards/rejected": -0.24540670216083527, "step": 2610 }, { "epoch": 0.34, "learning_rate": 1.3603446777675665e-06, "logits/chosen": -1.9292303323745728, "logits/rejected": -1.7285076379776, "logps/chosen": -363.197265625, "logps/rejected": -423.7437438964844, "loss": 0.011, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.08268821239471436, "rewards/margins": 0.09848850965499878, "rewards/rejected": -0.18117670714855194, "step": 2620 }, { "epoch": 0.34, "learning_rate": 1.3400596746385817e-06, "logits/chosen": -1.8151944875717163, "logits/rejected": -1.4447648525238037, "logps/chosen": -421.9136657714844, "logps/rejected": -522.0579223632812, "loss": 0.0331, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.13248854875564575, "rewards/margins": 0.1346489042043686, "rewards/rejected": -0.26713743805885315, "step": 2630 }, { "epoch": 0.35, "learning_rate": 1.3198715261929587e-06, "logits/chosen": -1.9300651550292969, "logits/rejected": -1.81232488155365, "logps/chosen": -384.4336853027344, "logps/rejected": -440.50152587890625, "loss": 0.0298, "rewards/accuracies": 0.5, "rewards/chosen": -0.1345733255147934, "rewards/margins": 0.08391193300485611, "rewards/rejected": -0.2184852659702301, "step": 2640 }, { "epoch": 0.35, "learning_rate": 1.2997819181352823e-06, "logits/chosen": -1.8099353313446045, "logits/rejected": -1.8255798816680908, "logps/chosen": -435.4271545410156, "logps/rejected": -550.7764282226562, "loss": 0.0138, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.11611498892307281, "rewards/margins": 0.0985056534409523, "rewards/rejected": -0.21462063491344452, "step": 2650 }, { "epoch": 0.35, "learning_rate": 1.2797925279420454e-06, "logits/chosen": -1.6378068923950195, "logits/rejected": -1.5208961963653564, "logps/chosen": -419.12872314453125, "logps/rejected": -436.3223571777344, "loss": 0.0189, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.12371046841144562, "rewards/margins": 0.054706841707229614, "rewards/rejected": -0.17841729521751404, "step": 2660 }, { "epoch": 0.35, "learning_rate": 1.2599050247215764e-06, "logits/chosen": -1.8828243017196655, "logits/rejected": -1.5881667137145996, "logps/chosen": -485.49237060546875, "logps/rejected": -516.7525024414062, "loss": 0.0116, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.1648116111755371, "rewards/margins": 0.09406300634145737, "rewards/rejected": -0.2588745951652527, "step": 2670 }, { "epoch": 0.35, "learning_rate": 1.2401210690746705e-06, "logits/chosen": -1.8691139221191406, "logits/rejected": -1.6119076013565063, "logps/chosen": -379.3757629394531, "logps/rejected": -471.1551208496094, "loss": 0.0144, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.10782364755868912, "rewards/margins": 0.11954446136951447, "rewards/rejected": -0.22736811637878418, "step": 2680 }, { "epoch": 0.35, "learning_rate": 1.2204423129559306e-06, "logits/chosen": -2.0099003314971924, "logits/rejected": -1.8623501062393188, "logps/chosen": -331.4324951171875, "logps/rejected": -408.6713562011719, "loss": 0.0177, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.10050249099731445, "rewards/margins": 0.09280785173177719, "rewards/rejected": -0.19331035017967224, "step": 2690 }, { "epoch": 0.35, "learning_rate": 1.20087039953583e-06, "logits/chosen": -1.7159931659698486, "logits/rejected": -1.6651582717895508, "logps/chosen": -297.18865966796875, "logps/rejected": -391.3750915527344, "loss": 0.0175, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.10160907357931137, "rewards/margins": 0.09469465911388397, "rewards/rejected": -0.19630375504493713, "step": 2700 }, { "epoch": 0.35, "learning_rate": 1.181406963063507e-06, "logits/chosen": -1.9415171146392822, "logits/rejected": -1.6620985269546509, "logps/chosen": -400.4481506347656, "logps/rejected": -427.08966064453125, "loss": 0.0207, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.148293137550354, "rewards/margins": 0.0670941099524498, "rewards/rejected": -0.2153872549533844, "step": 2710 }, { "epoch": 0.36, "learning_rate": 1.1620536287303052e-06, "logits/chosen": -1.8330198526382446, "logits/rejected": -1.6826404333114624, "logps/chosen": -368.10906982421875, "logps/rejected": -468.1707458496094, "loss": 0.0154, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.12084785848855972, "rewards/margins": 0.08665207028388977, "rewards/rejected": -0.2074999362230301, "step": 2720 }, { "epoch": 0.36, "learning_rate": 1.1428120125340717e-06, "logits/chosen": -1.8878908157348633, "logits/rejected": -1.6593071222305298, "logps/chosen": -360.3643493652344, "logps/rejected": -491.3680725097656, "loss": 0.0266, "rewards/accuracies": 0.625, "rewards/chosen": -0.12878648936748505, "rewards/margins": 0.10249900817871094, "rewards/rejected": -0.23128552734851837, "step": 2730 }, { "epoch": 0.36, "learning_rate": 1.123683721144223e-06, "logits/chosen": -1.7851661443710327, "logits/rejected": -1.512587070465088, "logps/chosen": -439.3026428222656, "logps/rejected": -454.8172912597656, "loss": 0.0246, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.14959672093391418, "rewards/margins": 0.0757288932800293, "rewards/rejected": -0.22532562911510468, "step": 2740 }, { "epoch": 0.36, "learning_rate": 1.1046703517675848e-06, "logits/chosen": -1.8425315618515015, "logits/rejected": -1.7289304733276367, "logps/chosen": -383.91796875, "logps/rejected": -565.6708374023438, "loss": 0.0208, "rewards/accuracies": 0.75, "rewards/chosen": -0.15151607990264893, "rewards/margins": 0.15814267098903656, "rewards/rejected": -0.3096587061882019, "step": 2750 }, { "epoch": 0.36, "learning_rate": 1.085773492015028e-06, "logits/chosen": -1.8818790912628174, "logits/rejected": -1.6095691919326782, "logps/chosen": -414.6910705566406, "logps/rejected": -506.7699279785156, "loss": 0.0175, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.1356070637702942, "rewards/margins": 0.11843924224376678, "rewards/rejected": -0.2540462911128998, "step": 2760 }, { "epoch": 0.36, "learning_rate": 1.0669947197689034e-06, "logits/chosen": -1.736973524093628, "logits/rejected": -1.568878412246704, "logps/chosen": -327.4220275878906, "logps/rejected": -403.37982177734375, "loss": 0.0282, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.10845674574375153, "rewards/margins": 0.0882064551115036, "rewards/rejected": -0.19666320085525513, "step": 2770 }, { "epoch": 0.36, "learning_rate": 1.048335603051291e-06, "logits/chosen": -1.7906758785247803, "logits/rejected": -1.5921446084976196, "logps/chosen": -317.88641357421875, "logps/rejected": -316.4330139160156, "loss": 0.0222, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.08554825931787491, "rewards/margins": 0.06365128606557846, "rewards/rejected": -0.14919956028461456, "step": 2780 }, { "epoch": 0.37, "learning_rate": 1.0297976998930665e-06, "logits/chosen": -1.959350347518921, "logits/rejected": -1.7278425693511963, "logps/chosen": -343.7808532714844, "logps/rejected": -388.09368896484375, "loss": 0.0268, "rewards/accuracies": 0.5, "rewards/chosen": -0.1059531569480896, "rewards/margins": 0.056411731988191605, "rewards/rejected": -0.1623648852109909, "step": 2790 }, { "epoch": 0.37, "learning_rate": 1.0113825582038078e-06, "logits/chosen": -1.7807786464691162, "logits/rejected": -1.7756898403167725, "logps/chosen": -443.08453369140625, "logps/rejected": -538.6343994140625, "loss": 0.0163, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.12049416452646255, "rewards/margins": 0.07103614509105682, "rewards/rejected": -0.19153030216693878, "step": 2800 }, { "epoch": 0.37, "learning_rate": 9.930917156425477e-07, "logits/chosen": -1.8056347370147705, "logits/rejected": -1.8034446239471436, "logps/chosen": -432.5520935058594, "logps/rejected": -492.48046875, "loss": 0.0154, "rewards/accuracies": 0.625, "rewards/chosen": -0.1606009155511856, "rewards/margins": 0.05354217812418938, "rewards/rejected": -0.2141430824995041, "step": 2810 }, { "epoch": 0.37, "learning_rate": 9.749266994893756e-07, "logits/chosen": -1.8276878595352173, "logits/rejected": -1.8232624530792236, "logps/chosen": -391.04827880859375, "logps/rejected": -523.3101806640625, "loss": 0.0186, "rewards/accuracies": 0.625, "rewards/chosen": -0.12191896140575409, "rewards/margins": 0.08811606466770172, "rewards/rejected": -0.210035040974617, "step": 2820 }, { "epoch": 0.37, "learning_rate": 9.56889026517913e-07, "logits/chosen": -1.727163553237915, "logits/rejected": -1.6771562099456787, "logps/chosen": -364.46258544921875, "logps/rejected": -498.42669677734375, "loss": 0.0152, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.12093399465084076, "rewards/margins": 0.1221398338675499, "rewards/rejected": -0.24307382106781006, "step": 2830 }, { "epoch": 0.37, "learning_rate": 9.389802028686617e-07, "logits/chosen": -1.7865333557128906, "logits/rejected": -1.9088605642318726, "logps/chosen": -333.36761474609375, "logps/rejected": -452.6796875, "loss": 0.0195, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.0991756021976471, "rewards/margins": 0.08042697608470917, "rewards/rejected": -0.17960259318351746, "step": 2840 }, { "epoch": 0.37, "learning_rate": 9.212017239232427e-07, "logits/chosen": -1.6909167766571045, "logits/rejected": -1.5613714456558228, "logps/chosen": -355.0484619140625, "logps/rejected": -500.7080078125, "loss": 0.0153, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.13768619298934937, "rewards/margins": 0.14242741465568542, "rewards/rejected": -0.2801136374473572, "step": 2850 }, { "epoch": 0.37, "learning_rate": 9.03555074179533e-07, "logits/chosen": -1.675554633140564, "logits/rejected": -1.6777839660644531, "logps/chosen": -331.7530822753906, "logps/rejected": -419.63385009765625, "loss": 0.0158, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.10129085928201675, "rewards/margins": 0.07592518627643585, "rewards/rejected": -0.17721602320671082, "step": 2860 }, { "epoch": 0.38, "learning_rate": 8.860417271277067e-07, "logits/chosen": -1.7569414377212524, "logits/rejected": -1.7091529369354248, "logps/chosen": -322.36627197265625, "logps/rejected": -478.31915283203125, "loss": 0.0141, "rewards/accuracies": 0.75, "rewards/chosen": -0.13447651267051697, "rewards/margins": 0.13838770985603333, "rewards/rejected": -0.2728641927242279, "step": 2870 }, { "epoch": 0.38, "learning_rate": 8.686631451272029e-07, "logits/chosen": -1.7315187454223633, "logits/rejected": -1.7625491619110107, "logps/chosen": -365.6579895019531, "logps/rejected": -451.216796875, "loss": 0.0293, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.12155483663082123, "rewards/margins": 0.08006703108549118, "rewards/rejected": -0.20162184536457062, "step": 2880 }, { "epoch": 0.38, "learning_rate": 8.514207792846168e-07, "logits/chosen": -1.7942087650299072, "logits/rejected": -1.828169822692871, "logps/chosen": -333.1575622558594, "logps/rejected": -421.3460998535156, "loss": 0.0187, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.11341162770986557, "rewards/margins": 0.07552970945835114, "rewards/rejected": -0.1889413297176361, "step": 2890 }, { "epoch": 0.38, "learning_rate": 8.343160693325356e-07, "logits/chosen": -1.713867425918579, "logits/rejected": -1.6187788248062134, "logps/chosen": -283.31732177734375, "logps/rejected": -507.02276611328125, "loss": 0.0276, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.11991555988788605, "rewards/margins": 0.1559232473373413, "rewards/rejected": -0.27583879232406616, "step": 2900 }, { "epoch": 0.38, "learning_rate": 8.173504435093174e-07, "logits/chosen": -1.9715394973754883, "logits/rejected": -1.694689393043518, "logps/chosen": -413.58782958984375, "logps/rejected": -476.80804443359375, "loss": 0.02, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.10615275055170059, "rewards/margins": 0.08019173890352249, "rewards/rejected": -0.18634448945522308, "step": 2910 }, { "epoch": 0.38, "learning_rate": 8.00525318439836e-07, "logits/chosen": -1.6772304773330688, "logits/rejected": -1.6047683954238892, "logps/chosen": -352.08709716796875, "logps/rejected": -447.92071533203125, "loss": 0.0179, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.13414040207862854, "rewards/margins": 0.07959143072366714, "rewards/rejected": -0.21373188495635986, "step": 2920 }, { "epoch": 0.38, "learning_rate": 7.838420990171927e-07, "logits/chosen": -1.6803100109100342, "logits/rejected": -1.6617472171783447, "logps/chosen": -369.3343505859375, "logps/rejected": -558.9995727539062, "loss": 0.0131, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1545887589454651, "rewards/margins": 0.1316053569316864, "rewards/rejected": -0.2861941456794739, "step": 2930 }, { "epoch": 0.38, "learning_rate": 7.673021782854084e-07, "logits/chosen": -2.0515646934509277, "logits/rejected": -1.7345861196517944, "logps/chosen": -409.7217712402344, "logps/rejected": -454.6595764160156, "loss": 0.0177, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.12300684303045273, "rewards/margins": 0.09628129005432129, "rewards/rejected": -0.21928814053535461, "step": 2940 }, { "epoch": 0.39, "learning_rate": 7.509069373231039e-07, "logits/chosen": -1.766739845275879, "logits/rejected": -1.6829363107681274, "logps/chosen": -403.56158447265625, "logps/rejected": -526.936767578125, "loss": 0.0158, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.1515023559331894, "rewards/margins": 0.09100815653800964, "rewards/rejected": -0.24251051247119904, "step": 2950 }, { "epoch": 0.39, "learning_rate": 7.346577451281822e-07, "logits/chosen": -1.8228447437286377, "logits/rejected": -1.559351921081543, "logps/chosen": -461.4525451660156, "logps/rejected": -593.1720581054688, "loss": 0.0088, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.15342538058757782, "rewards/margins": 0.14591960608959198, "rewards/rejected": -0.2993450164794922, "step": 2960 }, { "epoch": 0.39, "learning_rate": 7.185559585035138e-07, "logits/chosen": -1.9145300388336182, "logits/rejected": -1.8285773992538452, "logps/chosen": -437.1171875, "logps/rejected": -516.6611328125, "loss": 0.0095, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.16908249258995056, "rewards/margins": 0.06570545583963394, "rewards/rejected": -0.2347879409790039, "step": 2970 }, { "epoch": 0.39, "learning_rate": 7.026029219436504e-07, "logits/chosen": -1.8295743465423584, "logits/rejected": -1.5706146955490112, "logps/chosen": -415.28021240234375, "logps/rejected": -580.3078002929688, "loss": 0.0167, "rewards/accuracies": 0.875, "rewards/chosen": -0.14067871868610382, "rewards/margins": 0.1622161567211151, "rewards/rejected": -0.3028948903083801, "step": 2980 }, { "epoch": 0.39, "learning_rate": 6.867999675225523e-07, "logits/chosen": -1.8819026947021484, "logits/rejected": -1.5036561489105225, "logps/chosen": -438.0957946777344, "logps/rejected": -489.4017639160156, "loss": 0.0084, "rewards/accuracies": 0.75, "rewards/chosen": -0.14029823243618011, "rewards/margins": 0.07762910425662994, "rewards/rejected": -0.21792733669281006, "step": 2990 }, { "epoch": 0.39, "learning_rate": 6.711484147823663e-07, "logits/chosen": -1.8335106372833252, "logits/rejected": -1.9043381214141846, "logps/chosen": -282.0878601074219, "logps/rejected": -428.53009033203125, "loss": 0.0143, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.0992058515548706, "rewards/margins": 0.10786984115839005, "rewards/rejected": -0.20707568526268005, "step": 3000 }, { "epoch": 0.39, "learning_rate": 6.556495706232413e-07, "logits/chosen": -1.7493526935577393, "logits/rejected": -1.6328833103179932, "logps/chosen": -335.4507141113281, "logps/rejected": -474.9689025878906, "loss": 0.0206, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.09414876252412796, "rewards/margins": 0.11405216157436371, "rewards/rejected": -0.20820090174674988, "step": 3010 }, { "epoch": 0.4, "learning_rate": 6.403047291942057e-07, "logits/chosen": -1.7894386053085327, "logits/rejected": -1.7089000940322876, "logps/chosen": -359.0506896972656, "logps/rejected": -541.7208862304688, "loss": 0.0166, "rewards/accuracies": 0.75, "rewards/chosen": -0.13221842050552368, "rewards/margins": 0.13505801558494568, "rewards/rejected": -0.26727643609046936, "step": 3020 }, { "epoch": 0.4, "learning_rate": 6.251151717851023e-07, "logits/chosen": -1.7384182214736938, "logits/rejected": -1.8190691471099854, "logps/chosen": -417.1878356933594, "logps/rejected": -487.277587890625, "loss": 0.0158, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.1196901947259903, "rewards/margins": 0.0724378153681755, "rewards/rejected": -0.1921280175447464, "step": 3030 }, { "epoch": 0.4, "learning_rate": 6.100821667196041e-07, "logits/chosen": -1.935723066329956, "logits/rejected": -1.761724829673767, "logps/chosen": -385.0484313964844, "logps/rejected": -478.97149658203125, "loss": 0.0204, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.147886723279953, "rewards/margins": 0.09008549898862839, "rewards/rejected": -0.237972229719162, "step": 3040 }, { "epoch": 0.4, "learning_rate": 5.952069692493062e-07, "logits/chosen": -1.771364450454712, "logits/rejected": -1.7364155054092407, "logps/chosen": -396.44659423828125, "logps/rejected": -555.443359375, "loss": 0.0262, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.15717598795890808, "rewards/margins": 0.12896773219108582, "rewards/rejected": -0.2861437499523163, "step": 3050 }, { "epoch": 0.4, "learning_rate": 5.80490821448918e-07, "logits/chosen": -1.6604974269866943, "logits/rejected": -1.4811383485794067, "logps/chosen": -304.014892578125, "logps/rejected": -394.8099365234375, "loss": 0.0238, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.11539293825626373, "rewards/margins": 0.09489092230796814, "rewards/rejected": -0.21028387546539307, "step": 3060 }, { "epoch": 0.4, "learning_rate": 5.659349521125459e-07, "logits/chosen": -1.6314293146133423, "logits/rejected": -1.7141329050064087, "logps/chosen": -299.427490234375, "logps/rejected": -535.4639282226562, "loss": 0.0192, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.11823688447475433, "rewards/margins": 0.15163354575634003, "rewards/rejected": -0.26987043023109436, "step": 3070 }, { "epoch": 0.4, "learning_rate": 5.5154057665109e-07, "logits/chosen": -1.7767751216888428, "logits/rejected": -1.5554602146148682, "logps/chosen": -419.2239685058594, "logps/rejected": -515.3742065429688, "loss": 0.0174, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1414458453655243, "rewards/margins": 0.11291448026895523, "rewards/rejected": -0.2543603479862213, "step": 3080 }, { "epoch": 0.4, "learning_rate": 5.373088969907586e-07, "logits/chosen": -1.7762525081634521, "logits/rejected": -1.5618431568145752, "logps/chosen": -340.00653076171875, "logps/rejected": -409.5877380371094, "loss": 0.023, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.12462613731622696, "rewards/margins": 0.09855198860168457, "rewards/rejected": -0.22317814826965332, "step": 3090 }, { "epoch": 0.41, "learning_rate": 5.23241101472709e-07, "logits/chosen": -1.6521999835968018, "logits/rejected": -1.4713876247406006, "logps/chosen": -415.926513671875, "logps/rejected": -501.5210876464844, "loss": 0.0226, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.14230573177337646, "rewards/margins": 0.08339878916740417, "rewards/rejected": -0.22570452094078064, "step": 3100 }, { "epoch": 0.41, "learning_rate": 5.09338364753818e-07, "logits/chosen": -1.9044616222381592, "logits/rejected": -1.8478124141693115, "logps/chosen": -307.904052734375, "logps/rejected": -430.0193786621094, "loss": 0.0117, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.09876067191362381, "rewards/margins": 0.07787071913480759, "rewards/rejected": -0.1766313910484314, "step": 3110 }, { "epoch": 0.41, "learning_rate": 4.956018477086005e-07, "logits/chosen": -1.9966977834701538, "logits/rejected": -1.6211084127426147, "logps/chosen": -390.1811218261719, "logps/rejected": -467.33892822265625, "loss": 0.0194, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.11299258470535278, "rewards/margins": 0.10159225761890411, "rewards/rejected": -0.2145848274230957, "step": 3120 }, { "epoch": 0.41, "learning_rate": 4.820326973322764e-07, "logits/chosen": -1.8979175090789795, "logits/rejected": -1.7858377695083618, "logps/chosen": -406.2095642089844, "logps/rejected": -467.02801513671875, "loss": 0.0191, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.12170519679784775, "rewards/margins": 0.10835738480091095, "rewards/rejected": -0.2300625741481781, "step": 3130 }, { "epoch": 0.41, "learning_rate": 4.686320466449981e-07, "logits/chosen": -1.5622245073318481, "logits/rejected": -1.5973408222198486, "logps/chosen": -358.85968017578125, "logps/rejected": -529.4793701171875, "loss": 0.0188, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.11333116143941879, "rewards/margins": 0.13866858184337616, "rewards/rejected": -0.25199976563453674, "step": 3140 }, { "epoch": 0.41, "learning_rate": 4.554010145972418e-07, "logits/chosen": -1.9318832159042358, "logits/rejected": -1.5084483623504639, "logps/chosen": -408.9183654785156, "logps/rejected": -441.175537109375, "loss": 0.0113, "rewards/accuracies": 0.625, "rewards/chosen": -0.1270248144865036, "rewards/margins": 0.08558224141597748, "rewards/rejected": -0.21260705590248108, "step": 3150 }, { "epoch": 0.41, "learning_rate": 4.4234070597637455e-07, "logits/chosen": -1.9005769491195679, "logits/rejected": -1.6583656072616577, "logps/chosen": -485.7540588378906, "logps/rejected": -552.005126953125, "loss": 0.0204, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.15117983520030975, "rewards/margins": 0.08778969198465347, "rewards/rejected": -0.23896953463554382, "step": 3160 }, { "epoch": 0.41, "learning_rate": 4.2945221131440783e-07, "logits/chosen": -1.9350112676620483, "logits/rejected": -1.7964184284210205, "logps/chosen": -377.2402038574219, "logps/rejected": -495.5428771972656, "loss": 0.0237, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1443541944026947, "rewards/margins": 0.09460046887397766, "rewards/rejected": -0.23895466327667236, "step": 3170 }, { "epoch": 0.42, "learning_rate": 4.167366067969381e-07, "logits/chosen": -1.8541675806045532, "logits/rejected": -1.7513071298599243, "logps/chosen": -398.96832275390625, "logps/rejected": -470.5838928222656, "loss": 0.0103, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.14500777423381805, "rewards/margins": 0.06419918686151505, "rewards/rejected": -0.2092069685459137, "step": 3180 }, { "epoch": 0.42, "learning_rate": 4.041949541732826e-07, "logits/chosen": -1.75282883644104, "logits/rejected": -1.791329026222229, "logps/chosen": -339.1605224609375, "logps/rejected": -475.42376708984375, "loss": 0.0149, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.12593409419059753, "rewards/margins": 0.12885800004005432, "rewards/rejected": -0.2547920346260071, "step": 3190 }, { "epoch": 0.42, "learning_rate": 3.9182830066782614e-07, "logits/chosen": -2.016516923904419, "logits/rejected": -1.7938131093978882, "logps/chosen": -380.2807922363281, "logps/rejected": -481.1507263183594, "loss": 0.0221, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.11394486576318741, "rewards/margins": 0.09651365131139755, "rewards/rejected": -0.21045854687690735, "step": 3200 }, { "epoch": 0.42, "learning_rate": 3.796376788925771e-07, "logits/chosen": -1.923266053199768, "logits/rejected": -1.65493905544281, "logps/chosen": -397.8783874511719, "logps/rejected": -565.4571533203125, "loss": 0.0301, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.1346154808998108, "rewards/margins": 0.13186214864253998, "rewards/rejected": -0.26647764444351196, "step": 3210 }, { "epoch": 0.42, "learning_rate": 3.676241067609465e-07, "logits/chosen": -1.7783008813858032, "logits/rejected": -1.571351408958435, "logps/chosen": -385.0386962890625, "logps/rejected": -432.5997009277344, "loss": 0.0177, "rewards/accuracies": 0.75, "rewards/chosen": -0.1197885274887085, "rewards/margins": 0.0978497713804245, "rewards/rejected": -0.217638298869133, "step": 3220 }, { "epoch": 0.42, "learning_rate": 3.5578858740274976e-07, "logits/chosen": -1.9327704906463623, "logits/rejected": -1.7286369800567627, "logps/chosen": -511.8605041503906, "logps/rejected": -628.949951171875, "loss": 0.0202, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.16340863704681396, "rewards/margins": 0.1368248164653778, "rewards/rejected": -0.3002334237098694, "step": 3230 }, { "epoch": 0.42, "learning_rate": 3.44132109080447e-07, "logits/chosen": -1.8676338195800781, "logits/rejected": -1.4100158214569092, "logps/chosen": -396.5127868652344, "logps/rejected": -422.2503967285156, "loss": 0.0174, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.1372300088405609, "rewards/margins": 0.0994165688753128, "rewards/rejected": -0.23664656281471252, "step": 3240 }, { "epoch": 0.43, "learning_rate": 3.3265564510662344e-07, "logits/chosen": -1.8984111547470093, "logits/rejected": -1.7924525737762451, "logps/chosen": -383.17901611328125, "logps/rejected": -433.2188415527344, "loss": 0.0142, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.12849511206150055, "rewards/margins": 0.061784617602825165, "rewards/rejected": -0.19027972221374512, "step": 3250 }, { "epoch": 0.43, "learning_rate": 3.213601537627195e-07, "logits/chosen": -1.757118821144104, "logits/rejected": -1.6811769008636475, "logps/chosen": -361.39141845703125, "logps/rejected": -485.991455078125, "loss": 0.0307, "rewards/accuracies": 0.625, "rewards/chosen": -0.12168528139591217, "rewards/margins": 0.08949202299118042, "rewards/rejected": -0.2111773043870926, "step": 3260 }, { "epoch": 0.43, "learning_rate": 3.1024657821901063e-07, "logits/chosen": -1.8983631134033203, "logits/rejected": -1.9102423191070557, "logps/chosen": -284.61767578125, "logps/rejected": -352.45831298828125, "loss": 0.0238, "rewards/accuracies": 0.5, "rewards/chosen": -0.09829960018396378, "rewards/margins": 0.04633808881044388, "rewards/rejected": -0.14463767409324646, "step": 3270 }, { "epoch": 0.43, "learning_rate": 2.9931584645585654e-07, "logits/chosen": -1.9065412282943726, "logits/rejected": -1.7483928203582764, "logps/chosen": -363.5090026855469, "logps/rejected": -471.3272399902344, "loss": 0.0184, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.10694222152233124, "rewards/margins": 0.10477688163518906, "rewards/rejected": -0.2117191106081009, "step": 3280 }, { "epoch": 0.43, "learning_rate": 2.885688711862136e-07, "logits/chosen": -1.895806908607483, "logits/rejected": -1.5531339645385742, "logps/chosen": -408.56329345703125, "logps/rejected": -447.0934143066406, "loss": 0.02, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.13035434484481812, "rewards/margins": 0.08635638654232025, "rewards/rejected": -0.21671073138713837, "step": 3290 }, { "epoch": 0.43, "learning_rate": 2.7800654977942486e-07, "logits/chosen": -1.7909250259399414, "logits/rejected": -1.5084540843963623, "logps/chosen": -378.9937744140625, "logps/rejected": -472.8016662597656, "loss": 0.0283, "rewards/accuracies": 0.75, "rewards/chosen": -0.10055921971797943, "rewards/margins": 0.11087435483932495, "rewards/rejected": -0.2114335596561432, "step": 3300 }, { "epoch": 0.43, "learning_rate": 2.6762976418628797e-07, "logits/chosen": -1.8816821575164795, "logits/rejected": -1.5647997856140137, "logps/chosen": -403.9762268066406, "logps/rejected": -399.05816650390625, "loss": 0.0339, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.1094084233045578, "rewards/margins": 0.07436503469944, "rewards/rejected": -0.1837734431028366, "step": 3310 }, { "epoch": 0.43, "learning_rate": 2.5743938086541354e-07, "logits/chosen": -1.929932951927185, "logits/rejected": -1.7042264938354492, "logps/chosen": -425.7578125, "logps/rejected": -532.0509033203125, "loss": 0.0136, "rewards/accuracies": 0.625, "rewards/chosen": -0.13902431726455688, "rewards/margins": 0.11742977797985077, "rewards/rejected": -0.25645411014556885, "step": 3320 }, { "epoch": 0.44, "learning_rate": 2.4743625071087574e-07, "logits/chosen": -1.7875635623931885, "logits/rejected": -1.7216848134994507, "logps/chosen": -284.62835693359375, "logps/rejected": -367.90679931640625, "loss": 0.0113, "rewards/accuracies": 0.625, "rewards/chosen": -0.09288414567708969, "rewards/margins": 0.09416206181049347, "rewards/rejected": -0.18704620003700256, "step": 3330 }, { "epoch": 0.44, "learning_rate": 2.3762120898116498e-07, "logits/chosen": -1.629289984703064, "logits/rejected": -1.6548233032226562, "logps/chosen": -294.29058837890625, "logps/rejected": -432.2232360839844, "loss": 0.0144, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.1142941489815712, "rewards/margins": 0.09768979251384735, "rewards/rejected": -0.21198394894599915, "step": 3340 }, { "epoch": 0.44, "learning_rate": 2.2799507522944048e-07, "logits/chosen": -1.912879228591919, "logits/rejected": -1.856755018234253, "logps/chosen": -386.7931823730469, "logps/rejected": -438.4544982910156, "loss": 0.0144, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1067216545343399, "rewards/margins": 0.07812465727329254, "rewards/rejected": -0.18484631180763245, "step": 3350 }, { "epoch": 0.44, "learning_rate": 2.1855865323510056e-07, "logits/chosen": -1.9285023212432861, "logits/rejected": -1.586591362953186, "logps/chosen": -357.08367919921875, "logps/rejected": -449.99700927734375, "loss": 0.0182, "rewards/accuracies": 0.75, "rewards/chosen": -0.12121524661779404, "rewards/margins": 0.10305871069431305, "rewards/rejected": -0.2242739498615265, "step": 3360 }, { "epoch": 0.44, "learning_rate": 2.0931273093666575e-07, "logits/chosen": -2.0450992584228516, "logits/rejected": -1.6013189554214478, "logps/chosen": -453.60479736328125, "logps/rejected": -434.035888671875, "loss": 0.0123, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.1065659299492836, "rewards/margins": 0.07594268023967743, "rewards/rejected": -0.18250861763954163, "step": 3370 }, { "epoch": 0.44, "learning_rate": 2.002580803659873e-07, "logits/chosen": -2.0260674953460693, "logits/rejected": -1.6672608852386475, "logps/chosen": -433.1702575683594, "logps/rejected": -402.4795227050781, "loss": 0.0234, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.1336720883846283, "rewards/margins": 0.04400887340307236, "rewards/rejected": -0.17768093943595886, "step": 3380 }, { "epoch": 0.44, "learning_rate": 1.913954575837826e-07, "logits/chosen": -1.8990083932876587, "logits/rejected": -1.5992904901504517, "logps/chosen": -409.26617431640625, "logps/rejected": -470.896484375, "loss": 0.0157, "rewards/accuracies": 0.625, "rewards/chosen": -0.15049056708812714, "rewards/margins": 0.07415696978569031, "rewards/rejected": -0.22464752197265625, "step": 3390 }, { "epoch": 0.44, "learning_rate": 1.827256026165028e-07, "logits/chosen": -1.7308130264282227, "logits/rejected": -1.5803091526031494, "logps/chosen": -370.61767578125, "logps/rejected": -498.44512939453125, "loss": 0.018, "rewards/accuracies": 0.75, "rewards/chosen": -0.10960374027490616, "rewards/margins": 0.10495195537805557, "rewards/rejected": -0.21455569565296173, "step": 3400 }, { "epoch": 0.45, "learning_rate": 1.7424923939454274e-07, "logits/chosen": -1.8837168216705322, "logits/rejected": -1.7200605869293213, "logps/chosen": -388.86077880859375, "logps/rejected": -482.8601989746094, "loss": 0.0143, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.11280441284179688, "rewards/margins": 0.09194304794073105, "rewards/rejected": -0.20474748313426971, "step": 3410 }, { "epoch": 0.45, "learning_rate": 1.6596707569179304e-07, "logits/chosen": -1.7294692993164062, "logits/rejected": -1.4970271587371826, "logps/chosen": -378.0173034667969, "logps/rejected": -475.9228515625, "loss": 0.0212, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.1412251591682434, "rewards/margins": 0.11580245196819305, "rewards/rejected": -0.25702759623527527, "step": 3420 }, { "epoch": 0.45, "learning_rate": 1.578798030665385e-07, "logits/chosen": -1.9886093139648438, "logits/rejected": -1.7199186086654663, "logps/chosen": -385.4482116699219, "logps/rejected": -466.707275390625, "loss": 0.0201, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.10231782495975494, "rewards/margins": 0.10437797009944916, "rewards/rejected": -0.2066957950592041, "step": 3430 }, { "epoch": 0.45, "learning_rate": 1.499880968037165e-07, "logits/chosen": -1.5503489971160889, "logits/rejected": -1.5826743841171265, "logps/chosen": -348.7884826660156, "logps/rejected": -447.23974609375, "loss": 0.0159, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.11516599357128143, "rewards/margins": 0.0892045721411705, "rewards/rejected": -0.20437054336071014, "step": 3440 }, { "epoch": 0.45, "learning_rate": 1.4229261585852805e-07, "logits/chosen": -1.931419014930725, "logits/rejected": -1.727283239364624, "logps/chosen": -416.19866943359375, "logps/rejected": -475.95672607421875, "loss": 0.0294, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.1294177621603012, "rewards/margins": 0.08598669618368149, "rewards/rejected": -0.2154044657945633, "step": 3450 }, { "epoch": 0.45, "learning_rate": 1.3479400280141886e-07, "logits/chosen": -1.9694263935089111, "logits/rejected": -1.655242919921875, "logps/chosen": -345.41656494140625, "logps/rejected": -407.39813232421875, "loss": 0.016, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1186901330947876, "rewards/margins": 0.0832347422838211, "rewards/rejected": -0.2019248753786087, "step": 3460 }, { "epoch": 0.45, "learning_rate": 1.2749288376442044e-07, "logits/chosen": -1.9172016382217407, "logits/rejected": -1.8104664087295532, "logps/chosen": -351.1995849609375, "logps/rejected": -388.0035705566406, "loss": 0.0218, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.09934596717357635, "rewards/margins": 0.07780973613262177, "rewards/rejected": -0.17715568840503693, "step": 3470 }, { "epoch": 0.46, "learning_rate": 1.203898683888713e-07, "logits/chosen": -2.0304388999938965, "logits/rejected": -1.8785274028778076, "logps/chosen": -338.3299255371094, "logps/rejected": -378.8971252441406, "loss": 0.0316, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.0862874686717987, "rewards/margins": 0.03513091057538986, "rewards/rejected": -0.12141837924718857, "step": 3480 }, { "epoch": 0.46, "learning_rate": 1.1348554977451132e-07, "logits/chosen": -2.007988929748535, "logits/rejected": -1.889350175857544, "logps/chosen": -417.0150451660156, "logps/rejected": -442.656494140625, "loss": 0.021, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.12762945890426636, "rewards/margins": 0.06958001852035522, "rewards/rejected": -0.19720949232578278, "step": 3490 }, { "epoch": 0.46, "learning_rate": 1.0678050442995802e-07, "logits/chosen": -1.9844154119491577, "logits/rejected": -1.805830717086792, "logps/chosen": -408.1218566894531, "logps/rejected": -476.0577697753906, "loss": 0.0204, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.12938465178012848, "rewards/margins": 0.09463149309158325, "rewards/rejected": -0.22401615977287292, "step": 3500 }, { "epoch": 0.46, "learning_rate": 1.0027529222456755e-07, "logits/chosen": -1.7113529443740845, "logits/rejected": -1.5692651271820068, "logps/chosen": -373.86602783203125, "logps/rejected": -457.77386474609375, "loss": 0.0153, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.13313065469264984, "rewards/margins": 0.06109083443880081, "rewards/rejected": -0.19422151148319244, "step": 3510 }, { "epoch": 0.46, "learning_rate": 9.397045634168766e-08, "logits/chosen": -1.9188206195831299, "logits/rejected": -1.7597535848617554, "logps/chosen": -381.99639892578125, "logps/rejected": -473.8807678222656, "loss": 0.0197, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.11674102395772934, "rewards/margins": 0.07928375899791718, "rewards/rejected": -0.19602477550506592, "step": 3520 }, { "epoch": 0.46, "learning_rate": 8.78665232332998e-08, "logits/chosen": -1.7811170816421509, "logits/rejected": -1.606605887413025, "logps/chosen": -393.5836486816406, "logps/rejected": -456.54736328125, "loss": 0.0209, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.11409644037485123, "rewards/margins": 0.08625979721546173, "rewards/rejected": -0.20035624504089355, "step": 3530 }, { "epoch": 0.46, "learning_rate": 8.196400257606208e-08, "logits/chosen": -1.8289616107940674, "logits/rejected": -1.755653977394104, "logps/chosen": -347.4233703613281, "logps/rejected": -435.627685546875, "loss": 0.0179, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.15368905663490295, "rewards/margins": 0.10434701293706894, "rewards/rejected": -0.2580360770225525, "step": 3540 }, { "epoch": 0.46, "learning_rate": 7.626338722875076e-08, "logits/chosen": -1.4682605266571045, "logits/rejected": -1.4346805810928345, "logps/chosen": -299.78424072265625, "logps/rejected": -426.435302734375, "loss": 0.0196, "rewards/accuracies": 0.625, "rewards/chosen": -0.10566931962966919, "rewards/margins": 0.1149459257721901, "rewards/rejected": -0.2206152379512787, "step": 3550 }, { "epoch": 0.47, "learning_rate": 7.076515319110688e-08, "logits/chosen": -1.8838917016983032, "logits/rejected": -1.8772399425506592, "logps/chosen": -329.28253173828125, "logps/rejected": -395.58428955078125, "loss": 0.0162, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.13901695609092712, "rewards/margins": 0.04220137000083923, "rewards/rejected": -0.18121832609176636, "step": 3560 }, { "epoch": 0.47, "learning_rate": 6.54697595640899e-08, "logits/chosen": -1.6363271474838257, "logits/rejected": -1.449350118637085, "logps/chosen": -372.8322448730469, "logps/rejected": -448.37127685546875, "loss": 0.0131, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.12390259653329849, "rewards/margins": 0.10821553319692612, "rewards/rejected": -0.2321181297302246, "step": 3570 }, { "epoch": 0.47, "learning_rate": 6.037764851154426e-08, "logits/chosen": -1.8637882471084595, "logits/rejected": -1.6043150424957275, "logps/chosen": -350.90240478515625, "logps/rejected": -457.96514892578125, "loss": 0.0199, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.12807050347328186, "rewards/margins": 0.11550267785787582, "rewards/rejected": -0.24357318878173828, "step": 3580 }, { "epoch": 0.47, "learning_rate": 5.548924522327748e-08, "logits/chosen": -1.8330078125, "logits/rejected": -1.6513811349868774, "logps/chosen": -443.91815185546875, "logps/rejected": -531.4874877929688, "loss": 0.0265, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.1490592509508133, "rewards/margins": 0.09423011541366577, "rewards/rejected": -0.24328935146331787, "step": 3590 }, { "epoch": 0.47, "learning_rate": 5.0804957879556915e-08, "logits/chosen": -1.7766271829605103, "logits/rejected": -1.578848958015442, "logps/chosen": -347.375244140625, "logps/rejected": -373.5815124511719, "loss": 0.028, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.14153003692626953, "rewards/margins": 0.06694964319467545, "rewards/rejected": -0.20847967267036438, "step": 3600 }, { "epoch": 0.47, "learning_rate": 4.632517761702815e-08, "logits/chosen": -1.8813426494598389, "logits/rejected": -1.7908045053482056, "logps/chosen": -331.4591979980469, "logps/rejected": -446.927734375, "loss": 0.0181, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.107657790184021, "rewards/margins": 0.11776338517665863, "rewards/rejected": -0.22542119026184082, "step": 3610 }, { "epoch": 0.47, "learning_rate": 4.205027849605359e-08, "logits/chosen": -1.8991270065307617, "logits/rejected": -1.6750634908676147, "logps/chosen": -413.450927734375, "logps/rejected": -477.51708984375, "loss": 0.0124, "rewards/accuracies": 0.75, "rewards/chosen": -0.1038760095834732, "rewards/margins": 0.11153750121593475, "rewards/rejected": -0.21541352570056915, "step": 3620 }, { "epoch": 0.48, "learning_rate": 3.798061746947995e-08, "logits/chosen": -2.0600037574768066, "logits/rejected": -1.8774570226669312, "logps/chosen": -356.8966979980469, "logps/rejected": -437.280029296875, "loss": 0.0227, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.09808599948883057, "rewards/margins": 0.08815163373947144, "rewards/rejected": -0.1862376481294632, "step": 3630 }, { "epoch": 0.48, "learning_rate": 3.411653435283158e-08, "logits/chosen": -1.8597673177719116, "logits/rejected": -1.6338917016983032, "logps/chosen": -326.682861328125, "logps/rejected": -386.28173828125, "loss": 0.0228, "rewards/accuracies": 0.625, "rewards/chosen": -0.10577195882797241, "rewards/margins": 0.09608887135982513, "rewards/rejected": -0.20186083018779755, "step": 3640 }, { "epoch": 0.48, "learning_rate": 3.04583517959367e-08, "logits/chosen": -1.9883283376693726, "logits/rejected": -1.755710244178772, "logps/chosen": -437.05322265625, "logps/rejected": -488.8763732910156, "loss": 0.0145, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.13006719946861267, "rewards/margins": 0.08393072336912155, "rewards/rejected": -0.21399791538715363, "step": 3650 }, { "epoch": 0.48, "learning_rate": 2.7006375255985984e-08, "logits/chosen": -1.8725805282592773, "logits/rejected": -1.894209861755371, "logps/chosen": -413.03582763671875, "logps/rejected": -572.1802978515625, "loss": 0.0383, "rewards/accuracies": 0.625, "rewards/chosen": -0.1521502286195755, "rewards/margins": 0.08880196511745453, "rewards/rejected": -0.24095220863819122, "step": 3660 }, { "epoch": 0.48, "learning_rate": 2.3760892972027328e-08, "logits/chosen": -1.9661140441894531, "logits/rejected": -1.5976365804672241, "logps/chosen": -392.95977783203125, "logps/rejected": -497.61029052734375, "loss": 0.0188, "rewards/accuracies": 0.625, "rewards/chosen": -0.1288428008556366, "rewards/margins": 0.11072500050067902, "rewards/rejected": -0.23956778645515442, "step": 3670 }, { "epoch": 0.48, "learning_rate": 2.072217594089765e-08, "logits/chosen": -1.82770574092865, "logits/rejected": -1.731122612953186, "logps/chosen": -405.0982360839844, "logps/rejected": -481.58984375, "loss": 0.0196, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.13442669808864594, "rewards/margins": 0.0866381973028183, "rewards/rejected": -0.22106489539146423, "step": 3680 }, { "epoch": 0.48, "learning_rate": 1.789047789459375e-08, "logits/chosen": -2.0566253662109375, "logits/rejected": -1.982370376586914, "logps/chosen": -370.77777099609375, "logps/rejected": -444.1426696777344, "loss": 0.0331, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.12508869171142578, "rewards/margins": 0.08249789476394653, "rewards/rejected": -0.20758657157421112, "step": 3690 }, { "epoch": 0.48, "learning_rate": 1.5266035279088708e-08, "logits/chosen": -2.011000156402588, "logits/rejected": -1.8373658657073975, "logps/chosen": -375.22113037109375, "logps/rejected": -472.55938720703125, "loss": 0.0291, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.10299670696258545, "rewards/margins": 0.09758211672306061, "rewards/rejected": -0.20057880878448486, "step": 3700 }, { "epoch": 0.49, "learning_rate": 1.2849067234584623e-08, "logits/chosen": -1.8899952173233032, "logits/rejected": -1.6640405654907227, "logps/chosen": -491.42449951171875, "logps/rejected": -550.1334228515625, "loss": 0.0181, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.1326027661561966, "rewards/margins": 0.10131551325321198, "rewards/rejected": -0.23391827940940857, "step": 3710 }, { "epoch": 0.49, "learning_rate": 1.0639775577218625e-08, "logits/chosen": -1.8999922275543213, "logits/rejected": -1.6849126815795898, "logps/chosen": -385.58636474609375, "logps/rejected": -559.1898803710938, "loss": 0.0126, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.123233363032341, "rewards/margins": 0.1539316475391388, "rewards/rejected": -0.2771649956703186, "step": 3720 }, { "epoch": 0.49, "learning_rate": 8.638344782207486e-09, "logits/chosen": -1.5874528884887695, "logits/rejected": -1.6966588497161865, "logps/chosen": -372.178466796875, "logps/rejected": -495.927978515625, "loss": 0.0211, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.14569786190986633, "rewards/margins": 0.08861680328845978, "rewards/rejected": -0.23431463539600372, "step": 3730 }, { "epoch": 0.49, "learning_rate": 6.84494196844715e-09, "logits/chosen": -2.0879836082458496, "logits/rejected": -1.895554780960083, "logps/chosen": -408.32940673828125, "logps/rejected": -429.34576416015625, "loss": 0.0173, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.11463910341262817, "rewards/margins": 0.04446711391210556, "rewards/rejected": -0.15910622477531433, "step": 3740 }, { "epoch": 0.49, "learning_rate": 5.259716884556121e-09, "logits/chosen": -1.798561692237854, "logits/rejected": -1.634193778038025, "logps/chosen": -404.978515625, "logps/rejected": -424.12158203125, "loss": 0.0253, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.12460513412952423, "rewards/margins": 0.06428088247776031, "rewards/rejected": -0.18888600170612335, "step": 3750 }, { "epoch": 0.49, "learning_rate": 3.882801896372967e-09, "logits/chosen": -1.8340867757797241, "logits/rejected": -1.459303855895996, "logps/chosen": -425.59979248046875, "logps/rejected": -513.0728149414062, "loss": 0.0228, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.12816472351551056, "rewards/margins": 0.18685688078403473, "rewards/rejected": -0.3150216042995453, "step": 3760 }, { "epoch": 0.49, "learning_rate": 2.7143119759026614e-09, "logits/chosen": -1.8362566232681274, "logits/rejected": -1.856183409690857, "logps/chosen": -384.69287109375, "logps/rejected": -546.099853515625, "loss": 0.0237, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.1406644880771637, "rewards/margins": 0.10808048397302628, "rewards/rejected": -0.24874496459960938, "step": 3770 }, { "epoch": 0.49, "learning_rate": 1.754344691717591e-09, "logits/chosen": -1.9808629751205444, "logits/rejected": -1.9746452569961548, "logps/chosen": -337.6439514160156, "logps/rejected": -460.733154296875, "loss": 0.0224, "rewards/accuracies": 0.625, "rewards/chosen": -0.1251327097415924, "rewards/margins": 0.07667910307645798, "rewards/rejected": -0.20181182026863098, "step": 3780 }, { "epoch": 0.5, "learning_rate": 1.0029802008096335e-09, "logits/chosen": -1.752171277999878, "logits/rejected": -1.8375740051269531, "logps/chosen": -328.9490661621094, "logps/rejected": -415.68621826171875, "loss": 0.0203, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.1352800726890564, "rewards/margins": 0.055338628590106964, "rewards/rejected": -0.19061870872974396, "step": 3790 }, { "epoch": 0.5, "learning_rate": 4.602812418974534e-10, "logits/chosen": -1.748002052307129, "logits/rejected": -1.7915000915527344, "logps/chosen": -360.3035583496094, "logps/rejected": -454.4839782714844, "loss": 0.0178, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.13218410313129425, "rewards/margins": 0.06466977298259735, "rewards/rejected": -0.1968538761138916, "step": 3800 }, { "epoch": 0.5, "learning_rate": 1.2629313018819312e-10, "logits/chosen": -1.8526389598846436, "logits/rejected": -1.6567462682724, "logps/chosen": -397.4435729980469, "logps/rejected": -475.48004150390625, "loss": 0.0205, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.1153927594423294, "rewards/margins": 0.0929543524980545, "rewards/rejected": -0.2083471268415451, "step": 3810 }, { "epoch": 0.5, "learning_rate": 1.0437535929996855e-12, "logits/chosen": -1.8983266353607178, "logits/rejected": -1.7345978021621704, "logps/chosen": -387.1775817871094, "logps/rejected": -436.37188720703125, "loss": 0.0229, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.12952563166618347, "rewards/margins": 0.08524824678897858, "rewards/rejected": -0.21477389335632324, "step": 3820 }, { "epoch": 0.5, "step": 3821, "total_flos": 0.0, "train_loss": 0.019872910955136467, "train_runtime": 29928.2049, "train_samples_per_second": 1.021, "train_steps_per_second": 0.128 } ], "logging_steps": 10, "max_steps": 3821, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }